RDKit
Open-source cheminformatics and machine learning.
BFGSOpt.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <math.h>
11 #include <RDGeneral/Invariant.h>
12 #include <cstring>
13 #include <algorithm>
14 
15 namespace BFGSOpt {
16  const double FUNCTOL=1e-4; //!< Default tolerance for function convergence in the minimizer
17  const double MOVETOL=1e-7; //!< Default tolerance for x changes in the minimizer
18  const int MAXITS=200; //!< Default maximum number of iterations
19  const double EPS=3e-8; //!< Default gradient tolerance in the minimizer
20  const double TOLX=4.*EPS; //!< Default direction vector tolerance in the minimizer
21  const double MAXSTEP=100.0; //!< Default maximim step size in the minimizer
22 
23  //! Do a Quasi-Newton minimization along a line.
24  /*!
25  See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
26 
27  \param dim the dimensionality of the space.
28  \param oldPt the current position, as an array.
29  \param oldVal the current function value.
30  \param grad the value of the function gradient at oldPt
31  \param dir the minimization direction
32  \param newPt used to return the final position
33  \param newVal used to return the final function value
34  \param func the function to minimize
35  \param maxStep the maximum allowable step size
36  \param resCode used to return the results of the search.
37 
38  Possible values for resCode are on return are:
39  - 0: success
40  - 1: the stepsize got too small. This probably indicates success.
41  - -1: the direction is bad (orthogonal to the gradient)
42  */
43  template <typename EnergyFunctor>
44  void linearSearch(unsigned int dim,double *oldPt,double oldVal,
45  double *grad,double *dir,double *newPt,
46  double &newVal,
47  EnergyFunctor func,
48  double maxStep,int &resCode){
49  PRECONDITION(oldPt,"bad input array");
50  PRECONDITION(grad,"bad input array");
51  PRECONDITION(dir,"bad input array");
52  PRECONDITION(newPt,"bad input array");
53 
54  const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
55  double sum=0.0,slope=0.0,test=0.0,lambda=0.0;
56  double lambda2=0.0,lambdaMin=0.0,tmpLambda=0.0,val2=0.0;
57 
58  resCode=-1;
59 
60  // get the length of the direction vector:
61  sum=0.0;
62  for(unsigned int i=0;i<dim;i++)
63  sum +=dir[i]*dir[i];
64  sum=sqrt(sum);
65 
66  // rescale if we're trying to move too far:
67  if(sum>maxStep){
68  for(unsigned int i=0;i<dim;i++)
69  dir[i] *= maxStep/sum;
70  }
71 
72  // make sure our direction has at least some component along
73  // -grad
74  slope=0.0;
75  for(unsigned int i=0;i<dim;i++){
76  slope += dir[i]*grad[i];
77  }
78  if(slope>=0.0){
79  return;
80  }
81 
82  test=0.0;
83  for(unsigned int i=0;i<dim;i++){
84  double temp=fabs(dir[i])/std::max(fabs(oldPt[i]),1.0);
85  if(temp>test) test=temp;
86  }
87 
88  lambdaMin = MOVETOL/test;
89  lambda = 1.0;
90  unsigned int it = 0;
91  while(it < MAX_ITER_LINEAR_SEARCH){
92  //std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
93  if(lambda<lambdaMin){
94  // the position change is too small
95  resCode=1;
96  break;
97  }
98  for(unsigned int i=0;i<dim;i++){
99  newPt[i]=oldPt[i]+lambda*dir[i];
100  }
101  newVal = func(newPt);
102 
103  if( newVal-oldVal <= FUNCTOL*lambda*slope ){
104  // we're converged on the function:
105  resCode=0;
106  return;
107  }
108  // if we made it this far, we need to backtrack:
109  if(it == 0){
110  // it's the first step:
111  tmpLambda = -slope / (2.0*(newVal-oldVal-slope));
112  } else {
113  double rhs1 = newVal-oldVal-lambda*slope;
114  double rhs2 = val2-oldVal-lambda2*slope;
115  double a = (rhs1/(lambda*lambda) - rhs2/(lambda2*lambda2))/
116  (lambda-lambda2);
117  double b = (-lambda2*rhs1/(lambda*lambda)+lambda*rhs2/(lambda2*lambda2))/
118  (lambda-lambda2);
119  if( a==0.0 ){
120  tmpLambda = -slope/(2.0*b);
121  } else {
122  double disc=b*b-3*a*slope;
123  if(disc<0.0){
124  tmpLambda = 0.5*lambda;
125  } else if(b<=0.0) {
126  tmpLambda = (-b+sqrt(disc))/(3.0*a);
127  } else {
128  tmpLambda = -slope/(b+sqrt(disc));
129  }
130  }
131  if( tmpLambda > 0.5*lambda ){
132  tmpLambda = 0.5*lambda;
133  }
134  }
135  lambda2 = lambda;
136  val2 = newVal;
137  lambda = std::max(tmpLambda,0.1*lambda);
138  ++it;
139  }
140  // nothing was done
141  //std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
142  for(unsigned int i=0;i<dim;i++){
143  newPt[i]=oldPt[i];
144  }
145  }
146 
147 #define CLEANUP() { delete [] grad; delete [] dGrad; delete [] hessDGrad;\
148  delete [] newPos; delete [] xi; delete [] invHessian; }
149  //! Do a BFGS minimization of a function.
150  /*!
151  See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
152 
153  \param dim the dimensionality of the space.
154  \param pos the starting position, as an array.
155  \param gradTol tolerance for gradient convergence
156  \param numIters used to return the number of iterations required
157  \param funcVal used to return the final function value
158  \param func the function to minimize
159  \param gradFunc calculates the gradient of func
160  \param funcTol tolerance for changes in the function value for convergence.
161  \param maxIts maximum number of iterations allowed
162 
163  \return a flag indicating success (or type of failure). Possible values are:
164  - 0: success
165  - 1: too many iterations were required
166  */
167  template <typename EnergyFunctor,typename GradientFunctor>
168  int minimize(unsigned int dim,double *pos,
169  double gradTol,
170  unsigned int &numIters,
171  double &funcVal,
172  EnergyFunctor func,
173  GradientFunctor gradFunc,
174  double funcTol=TOLX,
175  unsigned int maxIts=MAXITS){
176  PRECONDITION(pos,"bad input array");
177  PRECONDITION(gradTol>0,"bad tolerance");
178 
179  double sum,maxStep,fp;
180 
181  double *grad,*dGrad,*hessDGrad;
182  double *newPos,*xi;
183  double *invHessian;
184 
185  grad = new double[dim];
186  dGrad = new double[dim];
187  hessDGrad = new double[dim];
188  newPos = new double[dim];
189  xi = new double[dim];
190  invHessian = new double[dim*dim];
191 
192  // evaluate the function and gradient in our current position:
193  fp=func(pos);
194  gradFunc(pos,grad);
195 
196  sum = 0.0;
197  memset(invHessian,0,dim*dim*sizeof(double));
198  for(unsigned int i=0;i<dim;i++){
199  unsigned int itab=i*dim;
200  // initialize the inverse hessian to be identity:
201  invHessian[itab+i]=1.0;
202  // the first line dir is -grad:
203  xi[i] = -grad[i];
204  sum += pos[i]*pos[i];
205  }
206  // pick a max step size:
207  maxStep = MAXSTEP * std::max(sqrt(sum),static_cast<double>(dim));
208 
209 
210  for(unsigned int iter=1;iter<=maxIts;iter++){
211  numIters=iter;
212  int status;
213 
214  // do the line search:
215  linearSearch(dim,pos,fp,grad,xi,newPos,funcVal,func,maxStep,status);
216  CHECK_INVARIANT(status>=0,"bad direction in linearSearch");
217 
218  // save the function value for the next search:
219  fp = funcVal;
220 
221  // set the direction of this line and save the gradient:
222  double test=0.0;
223  for(unsigned int i=0;i<dim;i++){
224  xi[i] = newPos[i]-pos[i];
225  pos[i] = newPos[i];
226  double temp=fabs(xi[i])/std::max(fabs(pos[i]),1.0);
227  if(temp>test) test=temp;
228  dGrad[i] = grad[i];
229  }
230  //std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<" "<<TOLX<<std::endl;
231  if(test<TOLX) {
232  CLEANUP();
233  return 0;
234  }
235 
236  // update the gradient:
237  double gradScale=gradFunc(pos,grad);
238 
239  // is the gradient converged?
240  test=0.0;
241  double term=std::max(funcVal*gradScale,1.0);
242  for(unsigned int i=0;i<dim;i++){
243  double temp=fabs(grad[i])*std::max(fabs(pos[i]),1.0);
244  test=std::max(test,temp);
245  dGrad[i] = grad[i]-dGrad[i];
246  }
247  test /= term;
248  //std::cerr<<" "<<gradScale<<" "<<test<<" "<<gradTol<<std::endl;
249  if(test<gradTol){
250  CLEANUP();
251  return 0;
252  }
253 
254  //for(unsigned int i=0;i<dim;i++){
255  // figure out how much the gradient changed:
256  //}
257 
258  // compute hessian*dGrad:
259  double fac=0,fae=0,sumDGrad=0,sumXi=0;
260  for(unsigned int i=0;i<dim;i++){
261  unsigned int itab=i*dim;
262  hessDGrad[i] = 0.0;
263  for(unsigned int j=0;j<dim;j++){
264  hessDGrad[i] += invHessian[itab+j]*dGrad[j];
265  }
266 
267  fac += dGrad[i]*xi[i];
268  fae += dGrad[i]*hessDGrad[i];
269  sumDGrad += dGrad[i]*dGrad[i];
270  sumXi += xi[i]*xi[i];
271  }
272  if(fac > sqrt(EPS*sumDGrad*sumXi)){
273  fac = 1.0/fac;
274  double fad = 1.0/fae;
275  for(unsigned int i=0;i<dim;i++){
276  dGrad[i] = fac*xi[i] - fad*hessDGrad[i];
277  }
278  for(unsigned int i=0;i<dim;i++){
279  unsigned int itab=i*dim;
280  for(unsigned int j=i;j<dim;j++){
281  invHessian[itab+j] += fac*xi[i]*xi[j] -
282  fad*hessDGrad[i]*hessDGrad[j] +
283  fae*dGrad[i]*dGrad[j];
284  invHessian[j*dim+i] = invHessian[itab+j];
285  }
286  }
287  }
288  // generate the next direction to move:
289  for(unsigned int i=0;i<dim;i++){
290  unsigned int itab=i*dim;
291  xi[i] = 0.0;
292  for(unsigned int j=0;j<dim;j++){
293  xi[i] -= invHessian[itab+j]*grad[j];
294  }
295  }
296  }
297  CLEANUP();
298  return 1;
299  }
300 }
const double MAXSTEP
Default maximim step size in the minimizer.
Definition: BFGSOpt.h:21
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:114
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition: BFGSOpt.h:168
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition: BFGSOpt.h:17
#define CLEANUP()
Definition: BFGSOpt.h:147
const double lambda
scaling factor for rBO correction
Definition: UFF/Params.h:89
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition: BFGSOpt.h:16
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition: BFGSOpt.h:44
const double TOLX
Default direction vector tolerance in the minimizer.
Definition: BFGSOpt.h:20
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
const double EPS
Default gradient tolerance in the minimizer.
Definition: BFGSOpt.h:19
const int MAXITS
Default maximum number of iterations.
Definition: BFGSOpt.h:18