RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/export.h>
13 #include <RDGeneral/hanoiSort.h>
14 #include <GraphMol/ROMol.h>
15 #include <GraphMol/RingInfo.h>
17 #include <boost/cstdint.hpp>
18 #include <boost/foreach.hpp>
19 #include <boost/dynamic_bitset.hpp>
21 #include <cstring>
22 #include <iostream>
23 #include <cassert>
24 #include <cstring>
25 #include <vector>
26 
27 //#define VERBOSE_CANON 1
28 
29 namespace RDKit {
30 namespace Canon {
31 
34  unsigned int bondStereo;
35  unsigned int nbrSymClass;
36  unsigned int nbrIdx;
38  : bondType(Bond::UNSPECIFIED),
39  bondStereo(static_cast<unsigned int>(Bond::STEREONONE)),
40  nbrSymClass(0),
41  nbrIdx(0){};
42  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
43  unsigned int nsc)
44  : bondType(bt),
45  bondStereo(static_cast<unsigned int>(bs)),
46  nbrSymClass(nsc),
47  nbrIdx(ni){};
48  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
49  unsigned int nsc)
50  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni){};
51  bool operator<(const bondholder &o) const {
52  if (bondType != o.bondType) return bondType < o.bondType;
53  if (bondStereo != o.bondStereo) return bondStereo < o.bondStereo;
54  return nbrSymClass < o.nbrSymClass;
55  }
56  static bool greater(const bondholder &lhs, const bondholder &rhs) {
57  if (lhs.bondType != rhs.bondType) return lhs.bondType > rhs.bondType;
58  if (lhs.bondStereo != rhs.bondStereo)
59  return lhs.bondStereo > rhs.bondStereo;
60  return lhs.nbrSymClass > rhs.nbrSymClass;
61  }
62 
63  static int compare(const bondholder &x, const bondholder &y,
64  unsigned int div = 1) {
65  if (x.bondType < y.bondType)
66  return -1;
67  else if (x.bondType > y.bondType)
68  return 1;
69  if (x.bondStereo < y.bondStereo)
70  return -1;
71  else if (x.bondStereo > y.bondStereo)
72  return 1;
73  return x.nbrSymClass / div - y.nbrSymClass / div;
74  }
75 };
76 
78  const Atom *atom;
79  int index;
80  unsigned int degree;
81  unsigned int totalNumHs;
82  bool hasRingNbr;
84  int *nbrIds;
85  const std::string *p_symbol; // if provided, this is used to order atoms
86  std::vector<int> neighborNum;
87  std::vector<int> revistedNeighbors;
88  std::vector<bondholder> bonds;
89 
91  : atom(NULL),
92  index(-1),
93  degree(0),
94  totalNumHs(0),
95  hasRingNbr(false),
96  isRingStereoAtom(false),
97  nbrIds(NULL),
98  p_symbol(NULL){};
99 };
100 
101 RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector<bondholder> &nbrs);
102 
104  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
105  std::vector<std::pair<unsigned int, unsigned int> > &result);
106 
107 /*
108  * Different types of atom compare functions:
109  *
110  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
111  *dependent chirality
112  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
113  *highly symmetrical graphs/molecules
114  * - AtomCompareFunctor: Basic atom compare function which also allows to
115  *include neighbors within the ranking
116  */
117 
119  public:
121  const ROMol *dp_mol;
122  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
123 
125  : dp_atoms(NULL),
126  dp_mol(NULL),
127  dp_atomsInPlay(NULL),
128  dp_bondsInPlay(NULL){};
130  Canon::canon_atom *atoms, const ROMol &m,
131  const boost::dynamic_bitset<> *atomsInPlay = NULL,
132  const boost::dynamic_bitset<> *bondsInPlay = NULL)
133  : dp_atoms(atoms),
134  dp_mol(&m),
135  dp_atomsInPlay(atomsInPlay),
136  dp_bondsInPlay(bondsInPlay){};
137  int operator()(int i, int j) const {
138  PRECONDITION(dp_atoms, "no atoms");
139  PRECONDITION(dp_mol, "no molecule");
140  PRECONDITION(i != j, "bad call");
141  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
142  return 0;
143  }
144 
145  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
146  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
147  }
148  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
149  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
150  }
151  for (unsigned int ii = 0;
152  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
153  int cmp =
154  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
155  if (cmp) return cmp;
156  }
157 
158  std::vector<std::pair<unsigned int, unsigned int> > swapsi;
159  std::vector<std::pair<unsigned int, unsigned int> > swapsj;
160  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
161  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
162  }
163  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
164  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
165  }
166  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
167  int cmp = swapsi[ii].second - swapsj[ii].second;
168  if (cmp) return cmp;
169  }
170  return 0;
171  }
172 };
173 
175  public:
177  const ROMol *dp_mol;
178  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
179 
181  : dp_atoms(NULL),
182  dp_mol(NULL),
183  dp_atomsInPlay(NULL),
184  dp_bondsInPlay(NULL){};
186  Canon::canon_atom *atoms, const ROMol &m,
187  const boost::dynamic_bitset<> *atomsInPlay = NULL,
188  const boost::dynamic_bitset<> *bondsInPlay = NULL)
189  : dp_atoms(atoms),
190  dp_mol(&m),
191  dp_atomsInPlay(atomsInPlay),
192  dp_bondsInPlay(bondsInPlay){};
193  int operator()(int i, int j) const {
194  PRECONDITION(dp_atoms, "no atoms");
195  PRECONDITION(dp_mol, "no molecule");
196  PRECONDITION(i != j, "bad call");
197  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
198  return 0;
199  }
200 
201  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
202  return -1;
203  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
204  return 1;
205  }
206 
207  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
208  return -1;
209  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
210  return 1;
211  }
212 
213  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
214  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
215  }
216  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
217  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
218  }
219  for (unsigned int ii = 0;
220  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
221  int cmp =
222  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
223  if (cmp) return cmp;
224  }
225 
226  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
227  return -1;
228  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
229  return 1;
230  }
231  return 0;
232  }
233 };
234 
236  unsigned int getAtomRingNbrCode(unsigned int i) const {
237  if (!dp_atoms[i].hasRingNbr) return 0;
238 
239  int *nbrs = dp_atoms[i].nbrIds;
240  unsigned int code = 0;
241  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
242  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
243  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
244  }
245  }
246  return code;
247  }
248 
249  int basecomp(int i, int j) const {
250  PRECONDITION(dp_atoms, "no atoms");
251  unsigned int ivi, ivj;
252 
253  // always start with the current class:
254  ivi = dp_atoms[i].index;
255  ivj = dp_atoms[j].index;
256  if (ivi < ivj)
257  return -1;
258  else if (ivi > ivj)
259  return 1;
260 
261  // use the atom-mapping numbers if they were assigned
262  /* boost::any_cast FILED here:
263  std::string molAtomMapNumber_i="";
264  std::string molAtomMapNumber_j="";
265  */
266  int molAtomMapNumber_i = 0;
267  int molAtomMapNumber_j = 0;
268  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
269  molAtomMapNumber_i);
270  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
271  molAtomMapNumber_j);
272  if (molAtomMapNumber_i < molAtomMapNumber_j)
273  return -1;
274  else if (molAtomMapNumber_i > molAtomMapNumber_j)
275  return 1;
276 
277  // start by comparing degree
278  ivi = dp_atoms[i].degree;
279  ivj = dp_atoms[j].degree;
280  if (ivi < ivj)
281  return -1;
282  else if (ivi > ivj)
283  return 1;
284 
285  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
286  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol))
287  return -1;
288  else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol))
289  return 1;
290  else
291  return 0;
292  }
293 
294  // move onto atomic number
295  ivi = dp_atoms[i].atom->getAtomicNum();
296  ivj = dp_atoms[j].atom->getAtomicNum();
297  if (ivi < ivj)
298  return -1;
299  else if (ivi > ivj)
300  return 1;
301 
302  // isotopes if we're using them
303  if (df_useIsotopes) {
304  ivi = dp_atoms[i].atom->getIsotope();
305  ivj = dp_atoms[j].atom->getIsotope();
306  if (ivi < ivj)
307  return -1;
308  else if (ivi > ivj)
309  return 1;
310  }
311 
312  // nHs
313  ivi = dp_atoms[i].totalNumHs;
314  ivj = dp_atoms[j].totalNumHs;
315  if (ivi < ivj)
316  return -1;
317  else if (ivi > ivj)
318  return 1;
319 
320  // charge
321  ivi = dp_atoms[i].atom->getFormalCharge();
322  ivj = dp_atoms[j].atom->getFormalCharge();
323  if (ivi < ivj)
324  return -1;
325  else if (ivi > ivj)
326  return 1;
327 
328  // chirality if we're using it
329  if (df_useChirality) {
330  // first atom stereochem:
331  ivi = 0;
332  ivj = 0;
333  std::string cipCode;
334  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
335  cipCode)) {
336  ivi = cipCode == "R" ? 2 : 1;
337  }
338  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
339  cipCode)) {
340  ivj = cipCode == "R" ? 2 : 1;
341  }
342  if (ivi < ivj)
343  return -1;
344  else if (ivi > ivj)
345  return 1;
346 
347  // can't actually use values here, because they are arbitrary
348  ivi = dp_atoms[i].atom->getChiralTag() != 0;
349  ivj = dp_atoms[j].atom->getChiralTag() != 0;
350  if (ivi < ivj)
351  return -1;
352  else if (ivi > ivj)
353  return 1;
354  }
355 
356  if (df_useChiralityRings) {
357  // ring stereochemistry
358  ivi = getAtomRingNbrCode(i);
359  ivj = getAtomRingNbrCode(j);
360  if (ivi < ivj)
361  return -1;
362  else if (ivi > ivj)
363  return 1;
364  // bond stereo is taken care of in the neighborhood comparison
365  }
366  return 0;
367  }
368 
369  public:
371  const ROMol *dp_mol;
372  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
377 
379  : dp_atoms(NULL),
380  dp_mol(NULL),
381  dp_atomsInPlay(NULL),
382  dp_bondsInPlay(NULL),
383  df_useNbrs(false),
384  df_useIsotopes(true),
385  df_useChirality(true),
386  df_useChiralityRings(true){};
388  const boost::dynamic_bitset<> *atomsInPlay = NULL,
389  const boost::dynamic_bitset<> *bondsInPlay = NULL)
390  : dp_atoms(atoms),
391  dp_mol(&m),
392  dp_atomsInPlay(atomsInPlay),
393  dp_bondsInPlay(bondsInPlay),
394  df_useNbrs(false),
395  df_useIsotopes(true),
396  df_useChirality(true),
397  df_useChiralityRings(true){};
398  int operator()(int i, int j) const {
399  PRECONDITION(dp_atoms, "no atoms");
400  PRECONDITION(dp_mol, "no molecule");
401  PRECONDITION(i != j, "bad call");
402  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
403  return 0;
404  }
405 
406  int v = basecomp(i, j);
407  if (v) {
408  return v;
409  }
410 
411  if (df_useNbrs) {
412  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
413  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
414  }
415  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
416  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
417  }
418 
419  for (unsigned int ii = 0;
420  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
421  ++ii) {
422  int cmp =
423  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
424  if (cmp) return cmp;
425  }
426 
427  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
428  return -1;
429  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
430  return 1;
431  }
432  }
433  return 0;
434  }
435 };
436 
437 /*
438  * A compare function to discriminate chiral atoms, similar to the CIP rules.
439  * This functionality is currently not used.
440  */
441 
442 const unsigned int ATNUM_CLASS_OFFSET = 10000;
444  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
445  for (unsigned j = 0; j < nbrs.size(); ++j) {
446  unsigned int nbrIdx = nbrs[j].nbrIdx;
447  if (nbrIdx == ATNUM_CLASS_OFFSET) {
448  // Ignore the Hs
449  continue;
450  }
451  const Atom *nbr = dp_atoms[nbrIdx].atom;
452  nbrs[j].nbrSymClass =
453  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
454  }
455  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
456  // FIX: don't want to be doing this long-term
457  }
458 
459  int basecomp(int i, int j) const {
460  PRECONDITION(dp_atoms, "no atoms");
461  unsigned int ivi, ivj;
462 
463  // always start with the current class:
464  ivi = dp_atoms[i].index;
465  ivj = dp_atoms[j].index;
466  if (ivi < ivj)
467  return -1;
468  else if (ivi > ivj)
469  return 1;
470 
471  // move onto atomic number
472  ivi = dp_atoms[i].atom->getAtomicNum();
473  ivj = dp_atoms[j].atom->getAtomicNum();
474  if (ivi < ivj)
475  return -1;
476  else if (ivi > ivj)
477  return 1;
478 
479  // isotopes:
480  ivi = dp_atoms[i].atom->getIsotope();
481  ivj = dp_atoms[j].atom->getIsotope();
482  if (ivi < ivj)
483  return -1;
484  else if (ivi > ivj)
485  return 1;
486 
487  // atom stereochem:
488  ivi = 0;
489  ivj = 0;
490  std::string cipCode;
491  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
492  cipCode)) {
493  ivi = cipCode == "R" ? 2 : 1;
494  }
495  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
496  cipCode)) {
497  ivj = cipCode == "R" ? 2 : 1;
498  }
499  if (ivi < ivj)
500  return -1;
501  else if (ivi > ivj)
502  return 1;
503 
504  // bond stereo is taken care of in the neighborhood comparison
505  return 0;
506  }
507 
508  public:
510  const ROMol *dp_mol;
513  : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false){};
515  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false){};
516  int operator()(int i, int j) const {
517  PRECONDITION(dp_atoms, "no atoms");
518  PRECONDITION(dp_mol, "no molecule");
519  PRECONDITION(i != j, "bad call");
520  int v = basecomp(i, j);
521  if (v) return v;
522 
523  if (df_useNbrs) {
524  getAtomNeighborhood(dp_atoms[i].bonds);
525  getAtomNeighborhood(dp_atoms[j].bonds);
526 
527  // we do two passes through the neighbor lists. The first just uses the
528  // atomic numbers (by passing the optional 10000 to bondholder::compare),
529  // the second takes the already-computed index into account
530  for (unsigned int ii = 0;
531  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
532  ++ii) {
533  int cmp = bondholder::compare(
534  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
535  if (cmp) return cmp;
536  }
537  for (unsigned int ii = 0;
538  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
539  ++ii) {
540  int cmp =
541  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
542  if (cmp) return cmp;
543  }
544  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
545  return -1;
546  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
547  return 1;
548  }
549  }
550  return 0;
551  }
552 };
553 
554 /*
555  * Basic canonicalization function to organize the partitions which will be
556  * sorted next.
557  * */
558 
559 template <typename CompareFunc>
560 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
561  int mode, int *order, int *count, int &activeset,
562  int *next, int *changed, char *touchedPartitions) {
563  unsigned int nAtoms = mol.getNumAtoms();
564  int partition;
565  int symclass = 0;
566  int *start;
567  int offset;
568  int index;
569  int len;
570  int i;
571  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
572 
573  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
574  while (activeset != -1) {
575  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
576  // std::cerr<<" next: ";
577  // for(unsigned int ii=0;ii<nAtoms;++ii){
578  // std::cerr<<ii<<":"<<next[ii]<<" ";
579  // }
580  // std::cerr<<std::endl;
581  // for(unsigned int ii=0;ii<nAtoms;++ii){
582  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
583  // "<<atoms[order[ii]].index<<std::endl;
584  // }
585 
586  partition = activeset;
587  activeset = next[partition];
588  next[partition] = -2;
589 
590  len = count[partition];
591  offset = atoms[partition].index;
592  start = order + offset;
593  // std::cerr<<"\n\n**************************************************************"<<std::endl;
594  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
595  // for(unsigned int ii=0;ii<len;++ii){
596  // std::cerr<<" "<<order[offset+ii]+1;
597  // }
598  // std::cerr<<std::endl;
599  // for(unsigned int ii=0;ii<nAtoms;++ii){
600  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
601  // "<<atoms[order[ii]].index<<std::endl;
602  // }
603  hanoisort(start, len, count, changed, compar);
604  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
605  // std::cerr<<" result:";
606  // for(unsigned int ii=0;ii<nAtoms;++ii){
607  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
608  // "<<atoms[order[ii]].index<<std::endl;
609  // }
610  for (int k = 0; k < len; ++k) {
611  changed[start[k]] = 0;
612  }
613 
614  index = start[0];
615  // std::cerr<<" len:"<<len<<" index:"<<index<<"
616  // count:"<<count[index]<<std::endl;
617  for (i = count[index]; i < len; i++) {
618  index = start[i];
619  if (count[index]) symclass = offset + i;
620  atoms[index].index = symclass;
621  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
622  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
623  // activeset=index;
624  //}
625  for (unsigned j = 0; j < atoms[index].degree; ++j) {
626  changed[atoms[index].nbrIds[j]] = 1;
627  }
628  }
629  // std::cerr<<std::endl;
630 
631  if (mode) {
632  index = start[0];
633  for (i = count[index]; i < len; i++) {
634  index = start[i];
635  for (unsigned j = 0; j < atoms[index].degree; ++j) {
636  unsigned int nbor = atoms[index].nbrIds[j];
637  touchedPartitions[atoms[nbor].index] = 1;
638  }
639  }
640  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
641  if (touchedPartitions[ii]) {
642  partition = order[ii];
643  if ((count[partition] > 1) && (next[partition] == -2)) {
644  next[partition] = activeset;
645  activeset = partition;
646  }
647  touchedPartitions[ii] = 0;
648  }
649  }
650  }
651  }
652 } // end of RefinePartitions()
653 
654 template <typename CompareFunc>
655 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
656  int mode, int *order, int *count, int &activeset, int *next,
657  int *changed, char *touchedPartitions) {
658  unsigned int nAtoms = mol.getNumAtoms();
659  int partition;
660  int offset;
661  int index;
662  int len;
663  int oldPart = 0;
664 
665  for (unsigned int i = 0; i < nAtoms; i++) {
666  partition = order[i];
667  oldPart = atoms[partition].index;
668  while (count[partition] > 1) {
669  len = count[partition];
670  offset = atoms[partition].index + len - 1;
671  index = order[offset];
672  atoms[index].index = offset;
673  count[partition] = len - 1;
674  count[index] = 1;
675 
676  // test for ions, water molecules with no
677  if (atoms[index].degree < 1) {
678  continue;
679  }
680  for (unsigned j = 0; j < atoms[index].degree; ++j) {
681  unsigned int nbor = atoms[index].nbrIds[j];
682  touchedPartitions[atoms[nbor].index] = 1;
683  changed[nbor] = 1;
684  }
685 
686  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
687  if (touchedPartitions[ii]) {
688  int npart = order[ii];
689  if ((count[npart] > 1) && (next[npart] == -2)) {
690  next[npart] = activeset;
691  activeset = npart;
692  }
693  touchedPartitions[ii] = 0;
694  }
695  }
696  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
697  changed, touchedPartitions);
698  }
699  // not sure if this works each time
700  if (atoms[partition].index != oldPart) {
701  i -= 1;
702  }
703  }
704 } // end of BreakTies()
705 
706 RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count,
707  canon_atom *atoms);
708 
709 RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count,
710  int &activeset, int *next, int *changed);
711 
712 RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res,
713  bool breakTies = true, bool includeChirality = true,
714  bool includeIsotopes = true);
715 
716 RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector<unsigned int> &res,
717  const boost::dynamic_bitset<> &atomsInPlay,
718  const boost::dynamic_bitset<> &bondsInPlay,
719  const std::vector<std::string> *atomSymbols = NULL,
720  bool breakTies = true, bool includeChirality = true,
721  bool includeIsotopes = true);
722 
723 RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res);
724 
725 RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector<Canon::canon_atom> &atoms,
726  bool includeChirality = true);
727 
728 } // end of Canon namespace
729 } // end of RDKit namespace
bool operator<(const bondholder &o) const
Definition: new_canon.h:51
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
const std::string * p_symbol
Definition: new_canon.h:85
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:122
int operator()(int i, int j) const
Definition: new_canon.h:516
Bond::BondType bondType
Definition: new_canon.h:33
std::vector< int > neighborNum
Definition: new_canon.h:86
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
unsigned int totalNumHs
Definition: new_canon.h:81
unsigned int nbrIdx
Definition: new_canon.h:36
Defines the primary molecule class ROMol as well as associated typedefs.
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:141
BondStereo
the nature of the bond&#39;s stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:387
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:42
BondType
the type of Bond
Definition: Bond.h:56
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
int operator()(int i, int j) const
Definition: new_canon.h:398
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:372
std::vector< bondholder > bonds
Definition: new_canon.h:88
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:56
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:294
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:514
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:442
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:185
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols=NULL, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
Std stuff.
Definition: Atom.h:30
unsigned int nbrSymClass
Definition: new_canon.h:35
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:115
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:655
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:178
class for representing a bond
Definition: Bond.h:47
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:63
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:560
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
#define PRECONDITION(expr, mess)
Definition: Invariant.h:108
unsigned int degree
Definition: new_canon.h:80
std::vector< int > revistedNeighbors
Definition: new_canon.h:87
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:48
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:129
unsigned int bondStereo
Definition: new_canon.h:34
The class for representing atoms.
Definition: Atom.h:69
Canon::canon_atom * dp_atoms
Definition: new_canon.h:370