RDKit
Open-source cheminformatics and machine learning.
O3AAlignMolecules.h
Go to the documentation of this file.
1 // $Id$
2 //
3 // Copyright (C) 2013-2014 Paolo Tosco
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_O3AALIGNMOLECULES_H_
12 #define _RD_O3AALIGNMOLECULES_H_
13 
14 #include <RDGeneral/Invariant.h>
15 #include <Geometry/Transform3D.h>
16 #include <Geometry/point.h>
17 #include <Numerics/Vector.h>
18 #include <GraphMol/ROMol.h>
19 #include <GraphMol/Conformer.h>
22 #include <vector>
23 #include <cmath>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/multi_array.hpp>
26 #include <boost/dynamic_bitset.hpp>
27 
28 namespace RDKit {
29  namespace MolAlign {
30  typedef struct O3AFuncData {
33  void *prbProp;
34  void *refProp;
35  int coeff;
36  int weight;
37  bool useMMFFSim;
38  } O3AFuncData;
39  inline bool isDoubleZero(const double x) {
40  return ((x < 1.0e-10) && (x > -1.0e-10));
41  };
42 
43  class O3AConstraintVect;
44 
45  //! A class to define alignment constraints. Each constraint
46  //! is defined by a pair of atom indexes (one for the probe,
47  //! one for the reference) and a weight. Constraints can
48  //! can be added via the O3AConstraintVect class.
49  class O3AConstraint {
50  friend class O3AConstraintVect;
51  public:
52  double getPrbIdx() {
53  return d_prbIdx;
54  }
55  double getRefIdx() {
56  return d_refIdx;
57  }
58  double getWeight() {
59  return d_weight;
60  }
61  private:
62  unsigned int d_idx;
63  unsigned int d_prbIdx;
64  unsigned int d_refIdx;
65  double d_weight;
66  };
67 
68  //! A class to store a vector of alignment constraints. Each constraint
69  //! is defined by an O3AConstraint object. Each time the append()
70  //! method is invoked, the vector is sorted to make lookup faster.
71  //! Hence, constraints are not necessarily stored in the same order
72  //! they were appended.
74  public:
76  d_count(0) {};
78  void append(unsigned int prbIdx, unsigned int refIdx, double weight) {
79  O3AConstraint *o3aConstraint = new O3AConstraint();
80  o3aConstraint->d_idx = d_count;
81  o3aConstraint->d_prbIdx = prbIdx;
82  o3aConstraint->d_refIdx = refIdx;
83  o3aConstraint->d_weight = weight;
84  d_o3aConstraintVect.push_back(boost::shared_ptr<O3AConstraint>(o3aConstraint));
85  std::sort(d_o3aConstraintVect.begin(),
86  d_o3aConstraintVect.end(), d_compareO3AConstraint);
87  ++d_count;
88  }
89  std::vector<boost::shared_ptr<O3AConstraint> >::size_type size() {
90  return d_o3aConstraintVect.size();
91  }
92  O3AConstraint *operator[](unsigned int i) {
93  return d_o3aConstraintVect[i].get();
94  }
95  private:
96  unsigned int d_count;
97  std::vector<boost::shared_ptr<O3AConstraint> > d_o3aConstraintVect;
98  static bool d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a, boost::shared_ptr<O3AConstraint> b)
99  {
100  return ((a->d_prbIdx != b->d_prbIdx) ? (a->d_prbIdx < b->d_prbIdx)
101  : ((a->d_refIdx != b->d_refIdx) ? (a->d_refIdx < b->d_refIdx)
102  : ((a->d_weight != b->d_weight) ? (a->d_weight > b->d_weight)
103  : (a->d_idx < b->d_idx))));
104  };
105  };
106 
107  const int O3_DUMMY_COST = 100000;
108  const int O3_LARGE_NEGATIVE_WEIGHT = -1e9;
109  const int O3_DEFAULT_CONSTRAINT_WEIGHT = 100.0;
110  const unsigned int O3_MAX_H_BINS = 20;
111  const unsigned int O3_MAX_SDM_ITERATIONS = 100;
112  const unsigned int O3_MAX_SDM_THRESHOLD_ITER = 3;
113  const double O3_RANDOM_TRANS_COEFF = 5.0;
114  const double O3_THRESHOLD_DIFF_DISTANCE = 0.1;
115  const double O3_SDM_THRESHOLD_START = 0.7;
116  const double O3_SDM_THRESHOLD_STEP = 0.3;
117  const double O3_CHARGE_WEIGHT = 10.0;
118  const double O3_CRIPPEN_WEIGHT = 10.0;
119  const double O3_RMSD_THRESHOLD = 1.0e-04;
120  const double O3_SCORE_THRESHOLD = 0.01;
121  const double O3_SCORING_FUNCTION_ALPHA = 5.0;
122  const double O3_SCORING_FUNCTION_BETA = 0.5;
123  const double O3_CHARGE_COEFF = 5.0;
124  const double O3_CRIPPEN_COEFF = 1.0;
125  const int O3_MAX_WEIGHT_COEFF = 5;
126  enum {
128  O3_ACCURACY_MASK = (1<<0 | 1<<1),
129  O3_LOCAL_ONLY = (1<<2)
130  };
131 
132  class MolHistogram {
133  public:
134  MolHistogram(const ROMol &mol, const double *dmat);
136  inline int get(const unsigned int y, const unsigned int x) const {
137  PRECONDITION(y < d_h.shape()[0], "Invalid index on MolHistogram");
138  PRECONDITION(x < d_h.shape()[1], "Invalid index on MolHistogram");
139  return d_h[y][x];
140  }
141  private:
142  boost::multi_array<int, 2> d_h;
143  };
144 
145  class LAP {
146  public:
147  LAP(unsigned int dim) :
148  d_rowSol(dim),
149  d_colSol(dim),
150  d_free(dim),
151  d_colList(dim),
152  d_matches(dim),
153  d_d(dim),
154  d_v(dim),
155  d_pred(dim),
156  d_cost(boost::extents[dim][dim]) {};
157  ~LAP() {};
158  int getCost(const unsigned int i, const unsigned int j) {
159  PRECONDITION(i < d_cost.shape()[0], "Invalid index on LAP.cost");
160  PRECONDITION(j < d_cost.shape()[1], "Invalid index on LAP.cost");
161  return d_cost[i][j];
162  }
163  int getRowSol(const unsigned int i) {
164  PRECONDITION(i < d_rowSol.size(), "Invalid index on LAP.rowSol");
165  return d_rowSol[i];
166  }
167  void computeMinCostPath(const int dim);
168  void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist,
169  const ROMol &refMol, const MolHistogram &refHist,
170  O3AConstraintVect *o3aConstraintVect, int (*costFunc)
171  (const unsigned int, const unsigned int, double, void *),
172  void *data, const unsigned int n_bins = O3_MAX_H_BINS);
173  private:
174  std::vector<int> d_rowSol;
175  std::vector<int> d_colSol;
176  std::vector<int> d_free;
177  std::vector<int> d_colList;
178  std::vector<int> d_matches;
179  std::vector<int> d_d;
180  std::vector<int> d_v;
181  std::vector<int> d_pred;
182  boost::multi_array<int, 2> d_cost;
183  };
184 
185  class SDM {
186  public:
187  // constructor
188  SDM(const Conformer *prbConf = NULL, const Conformer *refConf = NULL,
189  O3AConstraintVect *o3aConstraintVect = NULL) :
190  d_prbConf(prbConf),
191  d_refConf(refConf),
192  d_o3aConstraintVect(o3aConstraintVect) {};
193  // copy constructor
194  SDM(const SDM &other) :
195  d_prbConf(other.d_prbConf),
196  d_refConf(other.d_refConf),
197  d_o3aConstraintVect(other.d_o3aConstraintVect),
198  d_SDMPtrVect(other.d_SDMPtrVect.size()) {
199  for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
200  d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
201  memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(), sizeof(SDMElement));
202  }
203  };
204  // assignment operator
205  SDM& operator=(const SDM &other) {
206  d_prbConf = other.d_prbConf;
207  d_refConf = other.d_refConf;
208  d_o3aConstraintVect = other.d_o3aConstraintVect;
209  d_SDMPtrVect.resize(other.d_SDMPtrVect.size());
210  for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
211  d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
212  memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(), sizeof(SDMElement));
213  }
214 
215  return *this;
216  };
217  // destructor
218  ~SDM() {};
219  void fillFromDist(double threshold,
220  const boost::dynamic_bitset<> &refHvyAtoms,
221  const boost::dynamic_bitset<> &prbHvyAtoms);
222  void fillFromLAP(LAP &lap);
223  double scoreAlignment(double (*scoringFunc)
224  (const unsigned int, const unsigned int, void *), void *data);
225  void prepareMatchWeightsVect(RDKit::MatchVectType &matchVect,
226  RDNumeric::DoubleVector &weights, double (*weightFunc)
227  (const unsigned int, const unsigned int, void *), void *data);
228  unsigned int size() {
229  return d_SDMPtrVect.size();
230  }
231  private:
232  typedef struct SDMElement {
233  unsigned int idx[2];
234  int score;
235  int cost;
236  double sqDist;
237  O3AConstraint *o3aConstraint;
238  } SDMElement;
239  const Conformer *d_prbConf;
240  const Conformer *d_refConf;
241  O3AConstraintVect *d_o3aConstraintVect;
242  std::vector<boost::shared_ptr<SDMElement> > d_SDMPtrVect;
243  static bool compareSDMScore(boost::shared_ptr<SDMElement> a, boost::shared_ptr<SDMElement> b)
244  {
245  return ((a->score != b->score) ? (a->score < b->score)
246  : ((a->cost != b->cost) ? (a->cost < b->cost)
247  : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
248  : (a->idx[1] < b->idx[1]))));
249  };
250  static bool compareSDMDist(boost::shared_ptr<SDMElement> a, boost::shared_ptr<SDMElement> b)
251  {
252  double aWeight = (a->o3aConstraint ? a->o3aConstraint->getWeight() : 0.0);
253  double bWeight = (b->o3aConstraint ? b->o3aConstraint->getWeight() : 0.0);
254  return ((aWeight != bWeight) ? (aWeight > bWeight)
255  : ((a->sqDist != b->sqDist) ? (a->sqDist < b->sqDist)
256  : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
257  : (a->idx[1] < b->idx[1]))));
258  };
259  };
260 
261  class O3A {
262  public:
263  //! pre-defined atom typing schemes
264  typedef enum {
265  MMFF94=0,
266  CRIPPEN
267  } AtomTypeScheme;
268  O3A(ROMol &prbMol, const ROMol &refMol,
269  void *prbProp, void *refProp, AtomTypeScheme atomTypes = MMFF94,
270  const int prbCid = -1, const int refCid = -1,
271  const bool reflect = false, const unsigned int maxIters = 50,
272  unsigned int options = 0, const MatchVectType *constraintMap = NULL,
273  const RDNumeric::DoubleVector *constraintWeights = NULL, LAP *extLAP = NULL,
274  MolHistogram *extPrbHist = NULL, MolHistogram *extRefHist = NULL);
275  O3A(int (*costFunc)(const unsigned int, const unsigned int, double, void *),
276  double (*weightFunc)(const unsigned int, const unsigned int, void *),
277  double (*scoringFunc)(const unsigned int, const unsigned int, void *),
278  void *data, ROMol &prbMol, const ROMol &refMol,
279  const int prbCid, const int refCid,
280  boost::dynamic_bitset<> *prbHvyAtoms = NULL,
281  boost::dynamic_bitset<> *refHvyAtoms = NULL,
282  const bool reflect = false, const unsigned int maxIters = 50,
283  unsigned int options = 0, O3AConstraintVect *o3aConstraintVect = NULL,
284  ROMol *extWorkPrbMol = NULL, LAP *extLAP = NULL,
285  MolHistogram *extPrbHist = NULL, MolHistogram *extRefHist = NULL);
286  ~O3A() {
287  if (d_o3aMatchVect) {
288  delete d_o3aMatchVect;
289  }
290  if (d_o3aWeights) {
291  delete d_o3aWeights;
292  }
293  };
294  double align();
295  double trans(RDGeom::Transform3D &trans);
296  double score() {
297  return d_o3aScore;
298  };
300  return d_o3aMatchVect;
301  };
303  return d_o3aWeights;
304  };
305  private:
306  ROMol *d_prbMol;
307  const ROMol *d_refMol;
308  int d_prbCid;
309  int d_refCid;
310  bool d_reflect;
311  unsigned int d_maxIters;
312  const RDKit::MatchVectType *d_o3aMatchVect;
313  const RDNumeric::DoubleVector *d_o3aWeights;
314  double d_o3aScore;
315  };
316 
317  void randomTransform(ROMol &mol, const int cid = -1, const int seed = -1);
318  const RDGeom::POINT3D_VECT *reflect(const Conformer &conf);
319  int o3aMMFFCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data);
320  double o3aMMFFWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data);
321  double o3aMMFFScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data);
322  int o3aCrippenCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data);
323  double o3aCrippenWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data);
324  double o3aCrippenScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data);
325 
326  void getO3AForProbeConfs(ROMol &prbMol, const ROMol &refMol,
327  void *prbProp, void *refProp,
328  std::vector<boost::shared_ptr<O3A> > &res,
329  unsigned int numThreads=1,
330  O3A::AtomTypeScheme atomTypes = O3A::MMFF94,
331  const int refCid = -1,
332  const bool reflect = false, const unsigned int maxIters = 50,
333  unsigned int options = 0, const MatchVectType *constraintMap = NULL,
334  const RDNumeric::DoubleVector *constraintWeights = NULL);
335 
336  }
337 }
338 #endif
const double O3_SDM_THRESHOLD_START
std::vector< boost::shared_ptr< O3AConstraint > >::size_type size()
SDM(const SDM &other)
Definition: RDLog.h:16
const double O3_THRESHOLD_DIFF_DISTANCE
const double O3_SCORE_THRESHOLD
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx) ...
const double O3_CHARGE_WEIGHT
O3AConstraint * operator[](unsigned int i)
Defines the primary molecule class ROMol as well as associated typedefs.
const RDGeom::POINT3D_VECT * reflect(const Conformer &conf)
void append(unsigned int prbIdx, unsigned int refIdx, double weight)
double o3aCrippenScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
std::vector< Point3D > POINT3D_VECT
Definition: point.h:529
int o3aMMFFCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
SDM & operator=(const SDM &other)
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
int o3aCrippenCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
const double O3_RANDOM_TRANS_COEFF
void randomTransform(ROMol &mol, const int cid=-1, const int seed=-1)
const double O3_CHARGE_COEFF
const unsigned int O3_MAX_SDM_ITERATIONS
const unsigned int O3_MAX_H_BINS
LAP(unsigned int dim)
const RDKit::MatchVectType * matches()
const double O3_CRIPPEN_WEIGHT
const unsigned int O3_MAX_SDM_THRESHOLD_ITER
const RDNumeric::DoubleVector * weights()
const double O3_SCORING_FUNCTION_BETA
int getRowSol(const unsigned int i)
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
const double O3_SDM_THRESHOLD_STEP
const double O3_RMSD_THRESHOLD
bool isDoubleZero(const double x)
AtomTypeScheme
pre-defined atom typing schemes
const int O3_MAX_WEIGHT_COEFF
SDM(const Conformer *prbConf=NULL, const Conformer *refConf=NULL, O3AConstraintVect *o3aConstraintVect=NULL)
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:41
const int O3_DEFAULT_CONSTRAINT_WEIGHT
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
const int O3_LARGE_NEGATIVE_WEIGHT
struct RDKit::MolAlign::O3AFuncData O3AFuncData
double o3aMMFFWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
const double O3_SCORING_FUNCTION_ALPHA
const double O3_CRIPPEN_COEFF
void getO3AForProbeConfs(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp, std::vector< boost::shared_ptr< O3A > > &res, unsigned int numThreads=1, O3A::AtomTypeScheme atomTypes=O3A::MMFF94, const int refCid=-1, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, const MatchVectType *constraintMap=NULL, const RDNumeric::DoubleVector *constraintWeights=NULL)
double o3aCrippenWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
A class to represent vectors of numbers.
Definition: Vector.h:28
double o3aMMFFScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
int getCost(const unsigned int i, const unsigned int j)