RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/hanoiSort.h>
13 #include <GraphMol/ROMol.h>
14 #include <GraphMol/RingInfo.h>
15 #include <boost/cstdint.hpp>
16 #include <boost/foreach.hpp>
17 #include <boost/dynamic_bitset.hpp>
18 #include <cstring>
19 #include <iostream>
20 #include <cassert>
21 #include <cstring>
22 #include <vector>
23 
24 //#define VERBOSE_CANON 1
25 
26 namespace RDKit {
27  namespace Canon{
28 
29  struct bondholder {
31  unsigned int bondStereo;
32  unsigned int nbrSymClass;
33  unsigned int nbrIdx;
34  bondholder() : bondType(Bond::UNSPECIFIED),
35  bondStereo(static_cast<unsigned int>(Bond::STEREONONE)),
36  nbrIdx(0), nbrSymClass(0){};
37  bondholder(Bond::BondType bt,Bond::BondStereo bs,unsigned int ni,unsigned int nsc) :
38  bondType(bt),
39  bondStereo(static_cast<unsigned int>(bs)),
40  nbrIdx(ni),
41  nbrSymClass(nsc){};
42  bondholder(Bond::BondType bt,unsigned int bs,unsigned int ni,unsigned int nsc) :
43  bondType(bt),
44  bondStereo(bs),
45  nbrIdx(ni),
46  nbrSymClass(nsc){};
47  bool operator<(const bondholder &o) const {
48  if(bondType!=o.bondType)
49  return bondType<o.bondType;
50  if(bondStereo!=o.bondStereo)
51  return bondStereo<o.bondStereo;
52  return nbrSymClass<o.nbrSymClass;
53  }
54  static bool greater(const bondholder &lhs,const bondholder &rhs){
55  if(lhs.bondType!=rhs.bondType)
56  return lhs.bondType>rhs.bondType;
57  if(lhs.bondStereo!=rhs.bondStereo)
58  return lhs.bondStereo>rhs.bondStereo;
59  return lhs.nbrSymClass>rhs.nbrSymClass;
60  }
61 
62  static int compare(const bondholder &x,const bondholder &y,unsigned int div=1){
63  if(x.bondType<y.bondType)
64  return -1;
65  else if(x.bondType>y.bondType)
66  return 1;
67  if(x.bondStereo<y.bondStereo)
68  return -1;
69  else if(x.bondStereo>y.bondStereo)
70  return 1;
71  return x.nbrSymClass/div-y.nbrSymClass/div;
72  }
73  };
74 
75  struct canon_atom{
76  const Atom *atom;
77  int index;
78  unsigned int degree;
79  unsigned int totalNumHs;
80  bool hasRingNbr;
82  int* nbrIds;
83  const std::string *p_symbol; // if provided, this is used to order atoms
84  unsigned int neighborNum;
85  unsigned int revistedNeighbors;
86  std::vector<bondholder> bonds;
87 
88  canon_atom() : atom(NULL),index(-1),degree(0),totalNumHs(0),
89  hasRingNbr(false), isRingStereoAtom(false), nbrIds(NULL),
90  p_symbol(NULL), neighborNum(0), revistedNeighbors(0) {};
91  };
92 
93  void updateAtomNeighborIndex(canon_atom* atoms, std::vector<bondholder> &nbrs);
94 
95  void updateAtomNeighborNumSwaps(canon_atom* atoms, std::vector<bondholder> &nbrs,
96  unsigned int atomIdx, std::vector<std::pair<unsigned int, unsigned int> >& result);
97 
98  /*
99  * Different types of atom compare functions:
100  *
101  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting dependent chirality
102  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing highly symmetrical graphs/molecules
103  * - AtomCompareFunctor: Basic atom compare function which also allows to include neighbors within the ranking
104  */
105 
107 
108  public:
110  const ROMol *dp_mol;
111  const boost::dynamic_bitset<> *dp_atomsInPlay,*dp_bondsInPlay;
112 
113  SpecialChiralityAtomCompareFunctor() : dp_atoms(NULL), dp_mol(NULL),
114  dp_atomsInPlay(NULL), dp_bondsInPlay(NULL) {
115  };
117  const boost::dynamic_bitset<> *atomsInPlay=NULL,
118  const boost::dynamic_bitset<> *bondsInPlay=NULL ) :
119  dp_atoms(atoms), dp_mol(&m),
120  dp_atomsInPlay(atomsInPlay),dp_bondsInPlay(bondsInPlay) {
121  };
122  int operator()(int i,int j) const {
123  PRECONDITION(dp_atoms,"no atoms");
124  PRECONDITION(dp_mol,"no molecule");
125  PRECONDITION(i!=j,"bad call");
126  if(dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])){
127  return 0;
128  }
129 
130  if((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay){
131  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
132  }
133  if((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay){
134  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
135  }
136  for(unsigned int ii=0;ii<dp_atoms[i].bonds.size() && ii<dp_atoms[j].bonds.size();++ii){
137  int cmp=bondholder::compare(dp_atoms[i].bonds[ii],dp_atoms[j].bonds[ii]);
138  if(cmp) return cmp;
139  }
140 
141  std::vector<std::pair<unsigned int, unsigned int> > swapsi;
142  std::vector<std::pair<unsigned int, unsigned int> > swapsj;
143  if((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay){
144  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds,i,swapsi);
145  }
146  if((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay){
147  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds,j,swapsj);
148  }
149  for(unsigned int ii=0;ii<swapsi.size() && ii<swapsj.size();++ii){
150  int cmp=swapsi[ii].second-swapsj[ii].second;
151  if(cmp) return cmp;
152  }
153  return 0;
154  }
155  };
156 
158 
159  public:
161  const ROMol *dp_mol;
162  const boost::dynamic_bitset<> *dp_atomsInPlay,*dp_bondsInPlay;
163 
164  SpecialSymmetryAtomCompareFunctor() : dp_atoms(NULL), dp_mol(NULL),
165  dp_atomsInPlay(NULL), dp_bondsInPlay(NULL) {
166  };
168  const boost::dynamic_bitset<> *atomsInPlay=NULL,
169  const boost::dynamic_bitset<> *bondsInPlay=NULL ) :
170  dp_atoms(atoms), dp_mol(&m),
171  dp_atomsInPlay(atomsInPlay),dp_bondsInPlay(bondsInPlay) {
172  };
173  int operator()(int i,int j) const {
174  PRECONDITION(dp_atoms,"no atoms");
175  PRECONDITION(dp_mol,"no molecule");
176  PRECONDITION(i!=j,"bad call");
177  if(dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])){
178  return 0;
179  }
180 
181  if(dp_atoms[i].neighborNum < dp_atoms[j].neighborNum){
182  return -1;
183  }
184  else if(dp_atoms[i].neighborNum > dp_atoms[j].neighborNum){
185  return 1;
186  }
187 
188  if(dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors){
189  return -1;
190  }
191  else if(dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors){
192  return 1;
193  }
194 
195  if((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay){
196  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
197  }
198  if((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay){
199  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
200  }
201  for(unsigned int ii=0;ii<dp_atoms[i].bonds.size() && ii<dp_atoms[j].bonds.size();++ii){
202  int cmp=bondholder::compare(dp_atoms[i].bonds[ii],dp_atoms[j].bonds[ii]);
203  if(cmp) return cmp;
204  }
205 
206  if(dp_atoms[i].bonds.size()<dp_atoms[j].bonds.size()){
207  return -1;
208  } else if(dp_atoms[i].bonds.size()>dp_atoms[j].bonds.size()) {
209  return 1;
210  }
211  return 0;
212  }
213  };
214 
216 
217  unsigned int getAtomRingNbrCode(unsigned int i) const {
218  if(!dp_atoms[i].hasRingNbr) return 0;
219 
220  const Atom *at=dp_atoms[i].atom;
221  int *nbrs = dp_atoms[i].nbrIds;
222  unsigned int code=0;
223  for(unsigned j=0; j<dp_atoms[i].degree; ++j){
224  if(dp_atoms[nbrs[j]].isRingStereoAtom){
225 
226  code += dp_atoms[nbrs[j]].index*10000+1;//j;
227  }
228  }
229  return code;
230  }
231 
232  int basecomp(int i,int j) const {
233  PRECONDITION(dp_atoms,"no atoms");
234  unsigned int ivi,ivj;
235 
236  // always start with the current class:
237  ivi= dp_atoms[i].index;
238  ivj= dp_atoms[j].index;
239  if(ivi<ivj)
240  return -1;
241  else if(ivi>ivj)
242  return 1;
243 
244  // use the atom-mapping numbers if they were assigned
245  std::string molAtomMapNumber_i="";
246  std::string molAtomMapNumber_j="";
247  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,molAtomMapNumber_i);
248  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,molAtomMapNumber_j);
249  if(molAtomMapNumber_i<molAtomMapNumber_j)
250  return -1;
251  else if(molAtomMapNumber_i>molAtomMapNumber_j)
252  return 1;
253 
254  // start by comparing degree
255  ivi= dp_atoms[i].degree;
256  ivj= dp_atoms[j].degree;
257  if(ivi<ivj)
258  return -1;
259  else if(ivi>ivj)
260  return 1;
261 
262  if(dp_atoms[i].p_symbol &&
263  dp_atoms[j].p_symbol ) {
264  if( *(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol) )
265  return -1;
266  else if( *(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol) )
267  return 1;
268  else
269  return 0;
270  }
271  // move onto atomic number
272  ivi= dp_atoms[i].atom->getAtomicNum();
273  ivj= dp_atoms[j].atom->getAtomicNum();
274  if(ivi<ivj)
275  return -1;
276  else if(ivi>ivj)
277  return 1;
278 
279  // isotopes if we're using them
280  if(df_useIsotopes){
281  ivi=dp_atoms[i].atom->getIsotope();
282  ivj=dp_atoms[j].atom->getIsotope();
283  if(ivi<ivj)
284  return -1;
285  else if(ivi>ivj)
286  return 1;
287  }
288 
289  // nHs
290  ivi=dp_atoms[i].totalNumHs;
291  ivj=dp_atoms[j].totalNumHs;
292  if(ivi<ivj)
293  return -1;
294  else if(ivi>ivj)
295  return 1;
296 
297  // charge
298  ivi=dp_atoms[i].atom->getFormalCharge();
299  ivj=dp_atoms[j].atom->getFormalCharge();
300  if(ivi<ivj)
301  return -1;
302  else if(ivi>ivj)
303  return 1;
304 
305  // chirality if we're using it
306  if(df_useChirality){
307  // first atom stereochem:
308  ivi=0;
309  ivj=0;
310  std::string cipCode;
311  if(dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,cipCode)){
312  ivi=cipCode=="R"?2:1;
313  }
314  if(dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,cipCode)){
315  ivj=cipCode=="R"?2:1;
316  }
317  if(ivi<ivj)
318  return -1;
319  else if(ivi>ivj)
320  return 1;
321 
322  // can't actually use values here, because they are arbitrary
323  ivi=dp_atoms[i].atom->getChiralTag()!=0;
324  ivj=dp_atoms[j].atom->getChiralTag()!=0;
325  if(ivi<ivj)
326  return -1;
327  else if(ivi>ivj)
328  return 1;
329 
330  }
331  if(df_useChiralityRings){
332  // ring stereochemistry
333  ivi=getAtomRingNbrCode(i);
334  ivj=getAtomRingNbrCode(j);
335  if(ivi<ivj)
336  return -1;
337  else if(ivi>ivj)
338  return 1;
339  // bond stereo is taken care of in the neighborhood comparison
340  }
341  return 0;
342  }
343  public:
345  const ROMol *dp_mol;
346  const boost::dynamic_bitset<> *dp_atomsInPlay,*dp_bondsInPlay;
351 
352  AtomCompareFunctor() : dp_atoms(NULL), dp_mol(NULL),
353  dp_atomsInPlay(NULL), dp_bondsInPlay(NULL),
354  df_useNbrs(false),
355  df_useIsotopes(true), df_useChirality(true), df_useChiralityRings(true) {
356  };
358  const boost::dynamic_bitset<> *atomsInPlay=NULL,
359  const boost::dynamic_bitset<> *bondsInPlay=NULL ) :
360  dp_atoms(atoms), dp_mol(&m),
361  dp_atomsInPlay(atomsInPlay),dp_bondsInPlay(bondsInPlay),
362  df_useNbrs(false),df_useIsotopes(true), df_useChirality(true), df_useChiralityRings(true) {
363  };
364  int operator()(int i,int j) const {
365  PRECONDITION(dp_atoms,"no atoms");
366  PRECONDITION(dp_mol,"no molecule");
367  PRECONDITION(i!=j,"bad call");
368  if(dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])){
369  return 0;
370  }
371 
372  int v=basecomp(i,j);
373  if(v){
374  return v;
375  }
376 
377  if(df_useNbrs){
378  if((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay){
379  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
380  }
381  if((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay){
382  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
383  }
384 
385  for(unsigned int ii=0;ii<dp_atoms[i].bonds.size() && ii<dp_atoms[j].bonds.size();++ii){
386  int cmp=bondholder::compare(dp_atoms[i].bonds[ii],dp_atoms[j].bonds[ii]);
387  if(cmp) return cmp;
388  }
389 
390  if(dp_atoms[i].bonds.size()<dp_atoms[j].bonds.size()){
391  return -1;
392  } else if(dp_atoms[i].bonds.size()>dp_atoms[j].bonds.size()) {
393  return 1;
394  }
395  }
396  return 0;
397  }
398  };
399 
400  /*
401  * A compare function to discriminate chiral atoms, similar to the CIP rules.
402  * This functionality is currently not used.
403  */
404 
405  const unsigned int ATNUM_CLASS_OFFSET=10000;
407  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const{
408  for(unsigned j=0; j < nbrs.size(); ++j){
409  unsigned int nbrIdx = nbrs[j].nbrIdx;
410  if(nbrIdx == ATNUM_CLASS_OFFSET){
411  // Ignore the Hs
412  continue;
413  }
414  const Atom *nbr = dp_atoms[nbrIdx].atom;
415  nbrs[j].nbrSymClass = nbr->getAtomicNum()*ATNUM_CLASS_OFFSET+dp_atoms[nbrIdx].index+1;
416  }
417  std::sort(nbrs.begin(),nbrs.end(),bondholder::greater);
418  // FIX: don't want to be doing this long-term
419  }
420 
421  int basecomp(int i,int j) const {
422  PRECONDITION(dp_atoms,"no atoms");
423  unsigned int ivi,ivj;
424 
425  // always start with the current class:
426  ivi= dp_atoms[i].index;
427  ivj= dp_atoms[j].index;
428  if(ivi<ivj)
429  return -1;
430  else if(ivi>ivj)
431  return 1;
432 
433  // move onto atomic number
434  ivi= dp_atoms[i].atom->getAtomicNum();
435  ivj= dp_atoms[j].atom->getAtomicNum();
436  if(ivi<ivj)
437  return -1;
438  else if(ivi>ivj)
439  return 1;
440 
441  // isotopes:
442  ivi=dp_atoms[i].atom->getIsotope();
443  ivj=dp_atoms[j].atom->getIsotope();
444  if(ivi<ivj)
445  return -1;
446  else if(ivi>ivj)
447  return 1;
448 
449  // atom stereochem:
450  ivi=0;
451  ivj=0;
452  std::string cipCode;
453  if(dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,cipCode)){
454  ivi=cipCode=="R"?2:1;
455  }
456  if(dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,cipCode)){
457  ivj=cipCode=="R"?2:1;
458  }
459  if(ivi<ivj)
460  return -1;
461  else if(ivi>ivj)
462  return 1;
463 
464  // bond stereo is taken care of in the neighborhood comparison
465  return 0;
466  }
467  public:
469  const ROMol *dp_mol;
471  ChiralAtomCompareFunctor() : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false) {
472  };
473  ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m) : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {
474  };
475  int operator()(int i,int j) const {
476  PRECONDITION(dp_atoms,"no atoms");
477  PRECONDITION(dp_mol,"no molecule");
478  PRECONDITION(i!=j,"bad call");
479  int v=basecomp(i,j);
480  if(v) return v;
481 
482  if(df_useNbrs){
483  getAtomNeighborhood(dp_atoms[i].bonds);
484  getAtomNeighborhood(dp_atoms[j].bonds);
485 
486  // we do two passes through the neighbor lists. The first just uses the
487  // atomic numbers (by passing the optional 10000 to bondholder::compare),
488  // the second takes the already-computed index into account
489  for(unsigned int ii=0;ii<dp_atoms[i].bonds.size() && ii<dp_atoms[j].bonds.size();++ii){
490  int cmp=bondholder::compare(dp_atoms[i].bonds[ii],dp_atoms[j].bonds[ii],ATNUM_CLASS_OFFSET);
491  if(cmp) return cmp;
492  }
493  for(unsigned int ii=0;ii<dp_atoms[i].bonds.size() && ii<dp_atoms[j].bonds.size();++ii){
494  int cmp=bondholder::compare(dp_atoms[i].bonds[ii],dp_atoms[j].bonds[ii]);
495  if(cmp) return cmp;
496  }
497  if(dp_atoms[i].bonds.size()<dp_atoms[j].bonds.size()){
498  return -1;
499  } else if(dp_atoms[i].bonds.size()>dp_atoms[j].bonds.size()) {
500  return 1;
501  }
502  }
503  return 0;
504  }
505  };
506 
507  /*
508  * Basic canonicalization function to organize the partitions which will be sorted next.
509  * */
510 
511  template <typename CompareFunc>
512  void RefinePartitions(const ROMol &mol,
513  canon_atom *atoms,
514  CompareFunc compar, int mode,
515  int *order,
516  int *count,
517  int &activeset, int *next,
518  int *changed,
519  char *touchedPartitions){
520  unsigned int nAtoms=mol.getNumAtoms();
521  register int partition;
522  register int symclass;
523  register int *start;
524  register int offset;
525  register int index;
526  register int len;
527  register int i;
528  //std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
529 
530  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
531  while( activeset != -1 ) {
532  //std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
533  // std::cerr<<" next: ";
534  // for(unsigned int ii=0;ii<nAtoms;++ii){
535  // std::cerr<<ii<<":"<<next[ii]<<" ";
536  // }
537  // std::cerr<<std::endl;
538  // for(unsigned int ii=0;ii<nAtoms;++ii){
539  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index: "<<atoms[order[ii]].index<<std::endl;
540  // }
541 
542  partition = activeset;
543  activeset = next[partition];
544  next[partition] = -2;
545 
546  len = count[partition];
547  offset = atoms[partition].index;
548  start = order+offset;
549  // std::cerr<<"\n\n**************************************************************"<<std::endl;
550  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
551  // for(unsigned int ii=0;ii<len;++ii){
552  // std::cerr<<" "<<order[offset+ii]+1;
553  // }
554  // std::cerr<<std::endl;
555  // for(unsigned int ii=0;ii<nAtoms;++ii){
556  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index: "<<atoms[order[ii]].index<<std::endl;
557  // }
558  hanoisort(start,len,count,changed, compar);
559  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
560  // std::cerr<<" result:";
561  // for(unsigned int ii=0;ii<nAtoms;++ii){
562  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index: "<<atoms[order[ii]].index<<std::endl;
563  // }
564  for(unsigned k = 0; k < len; ++k){
565  changed[start[k]]=0;
566  }
567 
568  index = start[0];
569  // std::cerr<<" len:"<<len<<" index:"<<index<<" count:"<<count[index]<<std::endl;
570  for( i=count[index]; i<len; i++ ) {
571  index = start[i];
572  if( count[index] )
573  symclass = offset+i;
574  atoms[index].index = symclass;
575  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
576  //if(mode && (activeset<0 || count[index]>count[activeset]) ){
577  // activeset=index;
578  //}
579  for(unsigned j = 0; j < atoms[index].degree; ++j){
580  changed[atoms[index].nbrIds[j]]=1;
581  }
582  }
583  // std::cerr<<std::endl;
584 
585  if( mode ) {
586  index=start[0];
587  for( i=count[index]; i<len; i++ ) {
588  index=start[i];
589  for(unsigned j = 0; j < atoms[index].degree; ++j){
590  unsigned int nbor = atoms[index].nbrIds[j];
591  touchedPartitions[atoms[nbor].index]=1;
592  }
593  }
594  for(unsigned int ii=0; ii<nAtoms; ++ii) {
595  if(touchedPartitions[ii]){
596  partition = order[ii];
597  if( (count[partition]>1) &&
598  (next[partition]==-2) ) {
599  next[partition] = activeset;
600  activeset = partition;
601  }
602  touchedPartitions[ii] = 0;
603  }
604  }
605  }
606  }
607  } // end of RefinePartitions()
608 
609 
610 
611  template <typename CompareFunc>
612  void BreakTies(const ROMol &mol,
613  canon_atom *atoms,
614  CompareFunc compar, int mode,
615  int *order,
616  int *count,
617  int &activeset, int *next,
618  int *changed,
619  char *touchedPartitions){
620  unsigned int nAtoms=mol.getNumAtoms();
621  register int partition;
622  register int offset;
623  register int index;
624  register int len;
625  int oldPart = 0;
626 
627  for(unsigned int i=0; i<nAtoms; i++ ) {
628  partition = order[i];
629  oldPart = atoms[partition].index;
630  while( count[partition] > 1 ) {
631  len = count[partition];
632  offset = atoms[partition].index+len-1;
633  index = order[offset];
634  atoms[index].index = offset;
635  count[partition] = len-1;
636  count[index] = 1;
637 
638  //test for ions, water molecules with no
639  if(atoms[index].degree < 1){
640  continue;
641  }
642  for(unsigned j = 0; j < atoms[index].degree; ++j){
643  unsigned int nbor = atoms[index].nbrIds[j];
644  touchedPartitions[atoms[nbor].index]=1;
645  changed[nbor]=1;
646  }
647 
648  for(unsigned int ii=0; ii<nAtoms; ++ii) {
649  if(touchedPartitions[ii]){
650  int npart = order[ii];
651  if( (count[npart]>1) &&
652  (next[npart]==-2) ) {
653  next[npart] = activeset;
654  activeset = npart;
655  }
656  touchedPartitions[ii] = 0;
657  }
658  }
659  RefinePartitions(mol,atoms,compar,mode,order,count,activeset,next,changed,touchedPartitions);
660  }
661  //not sure if this works each time
662  if(atoms[partition].index != oldPart){
663  i-=1;
664  }
665  }
666  } // end of BreakTies()
667 
668 
669  void CreateSinglePartition(unsigned int nAtoms,
670  int *order,
671  int *count,
672  canon_atom *atoms);
673 
674  void ActivatePartitions(unsigned int nAtoms,
675  int *order,
676  int *count,
677  int &activeset,int *next,
678  int *changed);
679 
680  void rankMolAtoms(const ROMol &mol,std::vector<unsigned int> &res,
681  bool breakTies=true,
682  bool includeChirality=true,
683  bool includeIsotopes=true);
684 
685  void rankFragmentAtoms(const ROMol &mol,std::vector<unsigned int> &res,
686  const boost::dynamic_bitset<> &atomsInPlay,
687  const boost::dynamic_bitset<> &bondsInPlay,
688  const std::vector<std::string> *atomSymbols=NULL,
689  bool breakTies=true,
690  bool includeChirality=true,
691  bool includeIsotopes=true);
692 
693  void chiralRankMolAtoms(const ROMol &mol,std::vector<unsigned int> &res);
694 
695  void initCanonAtoms(const ROMol &mol,std::vector<Canon::canon_atom> &atoms,
696  bool includeChirality=true);
697 
698 
699  } // end of Canon namespace
700 } // end of RDKit namespace
int operator()(int i, int j) const
Definition: new_canon.h:475
const std::string * p_symbol
Definition: new_canon.h:83
void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
unsigned int neighborNum
Definition: new_canon.h:84
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:111
Bond::BondType bondType
Definition: new_canon.h:30
void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
unsigned int totalNumHs
Definition: new_canon.h:79
unsigned int nbrIdx
Definition: new_canon.h:33
void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const std::string molAtomMapNumber
Defines the primary molecule class ROMol as well as associated typedefs.
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:126
BondStereo
the nature of the bond&#39;s stereochem (for cis/trans)
Definition: Bond.h:93
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:357
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:37
BondType
the type of Bond
Definition: Bond.h:55
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:346
std::vector< bondholder > bonds
Definition: new_canon.h:86
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:54
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:473
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:405
unsigned int revistedNeighbors
Definition: new_canon.h:85
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:167
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:113
bool operator<(const bondholder &o) const
Definition: new_canon.h:47
const std::string _CIPCode
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
int operator()(int i, int j) const
Definition: new_canon.h:364
unsigned int nbrSymClass
Definition: new_canon.h:32
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:612
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:162
class for representing a bond
Definition: Bond.h:46
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:62
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:512
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
unsigned int degree
Definition: new_canon.h:78
void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols=NULL, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:42
void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:116
unsigned int bondStereo
Definition: new_canon.h:31
The class for representing atoms.
Definition: Atom.h:67
Canon::canon_atom * dp_atoms
Definition: new_canon.h:344