RDKit
Open-source cheminformatics and machine learning.
RGroupDecompData.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef RGROUP_DECOMP_DATA
11 #define RGROUP_DECOMP_DATA
12 
13 #include "RGroupCore.h"
14 #include "RGroupDecomp.h"
15 #include "RGroupMatch.h"
16 #include "RGroupScore.h"
17 #include "RGroupFingerprintScore.h"
18 #include "RGroupGa.h"
19 #include <vector>
20 #include <map>
21 
22 // #define VERBOSE 1
23 
24 namespace RDKit {
26  // matches[mol_idx] == vector of potential matches
27  std::map<int, RCore> cores;
28  std::map<std::string, int> newCores; // new "cores" found along the way
30  // this caches the running product of permutations
31  // across calls to process()
32  size_t permutationProduct = 1;
33  // this caches the size of the previous matches vector
34  // such that the size of the current chunk can be inferred
35  size_t previousMatchSize = 0;
36  // the default for Greedy/GreedyChunks is keeping only the best
37  // permutation after each call to process()
38  bool prunePermutations = true;
40 
41  std::vector<std::vector<RGroupMatch>> matches;
42  std::set<int> labels;
43  std::vector<size_t> permutation;
44  unsigned int pruneLength = 0U;
46  std::map<int, std::vector<int>> userLabels;
47 
48  std::vector<int> processedRlabels;
49 
50  std::map<int, int> finalRlabelMapping;
52 
53  RGroupDecompData(const RWMol &inputCore,
55  : params(std::move(inputParams)) {
56  cores[0] = RCore(inputCore);
57  prepareCores();
58  }
59 
60  RGroupDecompData(const std::vector<ROMOL_SPTR> &inputCores,
62  : params(std::move(inputParams)) {
63  for (size_t i = 0; i < inputCores.size(); ++i) {
64  cores[i] = RCore(*inputCores[i]);
65  }
66  prepareCores();
67  }
68 
69  void prepareCores() {
70  for (auto &core : cores) {
71  RWMol *alignCore = core.first ? cores[0].core.get() : nullptr;
72  CHECK_INVARIANT(params.prepareCore(*core.second.core, alignCore),
73  "Could not prepare at least one core");
74  core.second.init();
75  core.second.labelledCore.reset(new RWMol(*core.second.core));
76  }
77  }
78 
79  void setRlabel(Atom *atom, int rlabel) {
80  PRECONDITION(rlabel != 0, "RLabels must be >0");
82  atom->setAtomMapNum(rlabel);
83  }
84 
86  std::string dLabel = "R" + std::to_string(rlabel);
88  setAtomRLabel(atom, rlabel);
89  }
90 
92  atom->setIsotope(rlabel + 1);
93  }
94  }
95 
96  double scoreFromPrunedData(const std::vector<size_t> &permutation,
97  bool reset = true) {
100  "Scoring method is not fingerprint variance!");
101 
103  "Illegal permutation prune length");
104  if (permutation.size() < pruneLength * 1.5) {
105  for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
107  pos, permutation[pos], matches, labels);
108  }
109  double score =
111  if (reset) {
112  for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
114  pos, permutation[pos], matches, labels);
115  }
116  } else {
117  pruneLength = permutation.size();
118  }
119  return score;
120  } else {
121  if (reset) {
123  } else {
125  pruneLength = permutation.size();
128  }
129  }
130  }
131 
132  void prune() { // prune all but the current "best" permutation of matches
133  PRECONDITION(permutation.size() <= matches.size(), "permutation.size() should be <= matches.size()");
134  size_t offset = matches.size() - permutation.size();
135  for (size_t mol_idx = 0; mol_idx < permutation.size(); ++mol_idx) {
136  std::vector<RGroupMatch> keepVector;
137  size_t mi = mol_idx + offset;
138  keepVector.push_back(matches[mi].at(permutation[mol_idx]));
139  matches[mi] = keepVector;
140  }
141 
142  permutation = std::vector<size_t>(permutation.size(), 0);
146  }
147  }
148 
149  // Return the RGroups with the current "best" permutation
150  // of matches.
151  std::vector<RGroupMatch> GetCurrentBestPermutation() const {
152  const bool removeAllHydrogenRGroups =
155 
156  std::vector<RGroupMatch> results; // std::map<int, RGroup> > result;
157  bool isPruned = (permutation.size() < matches.size());
158  for (size_t i = 0; i < matches.size(); ++i) {
159  size_t pi = (isPruned ? 0 : permutation.at(i));
160  results.push_back(matches[i].at(pi));
161  }
162 
163  // * if a dynamically-added RGroup (i.e., when onlyMatchAtRGroups=false)
164  // is all hydrogens, remove it
165  // * if a user-defined RGroup is all hydrogens and either
166  // params.removeAllHydrogenRGroups==true or
167  // params.removeAllHydrogenRGroupsAndLabels==true, remove it
168 
169  // This logic is a bit tricky, find all labels that have common cores
170  // and analyze those sets independently.
171  // i.e. if core 1 doesn't have R1 then don't analyze it in when looking
172  // at label 1
173  std::map<int, std::set<int>> labelCores; // map from label->cores
174  std::set<int> coresVisited;
175  for (auto &position : results) {
176  int core_idx = position.core_idx;
177  if (coresVisited.find(core_idx) == coresVisited.end()) {
178  coresVisited.insert(core_idx);
179  auto core = cores.find(core_idx);
180  if (core != cores.end()) {
181  for (auto rlabels : getRlabels(*core->second.core)) {
182  int rlabel = rlabels.first;
183  labelCores[rlabel].insert(core_idx);
184  }
185  }
186  }
187  }
188 
189  for (int label : labels) {
190  if (label > 0 && !removeAllHydrogenRGroups) {
191  continue;
192  }
193  bool allH = true;
194  for (auto &position : results) {
195  R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
196  bool labelHasCore = labelCores[label].find(position.core_idx) !=
197  labelCores[label].end();
198  if (labelHasCore && rgroup != position.rgroups.end() &&
199  !rgroup->second->is_hydrogen) {
200  allH = false;
201  break;
202  }
203  }
204 
205  if (allH) {
206  for (auto &position : results) {
207  position.rgroups.erase(label);
208  }
209  }
210  }
211  return results;
212  }
213 
214  class UsedLabels {
215  public:
216  std::set<int> labels_used;
217  bool add(int rlabel) {
218  if (labels_used.find(rlabel) != labels_used.end()) {
219  return false;
220  }
221  labels_used.insert(rlabel);
222  return true;
223  }
224 
225  int next() {
226  int i = 1;
227  while (labels_used.find(i) != labels_used.end()) {
228  ++i;
229  }
230  labels_used.insert(i);
231  return i;
232  }
233  };
234 
235  void addCoreUserLabels(const RWMol &core, std::set<int> &userLabels) {
236  auto atoms = getRlabels(core);
237  for (const auto &p : atoms) {
238  if (p.first > 0) {
239  userLabels.insert(p.first);
240  }
241  }
242  }
243 
244  void addAtoms(RWMol &mol,
245  const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
246  for (const auto &i : atomsToAdd) {
247  mol.addAtom(i.second, false, true);
248  mol.addBond(i.first, i.second, Bond::SINGLE);
249  if (mol.getNumConformers()) {
250  MolOps::setTerminalAtomCoords(mol, i.second->getIdx(),
251  i.first->getIdx());
252  }
253  }
254  }
255 
256  void relabelCore(RWMol &core, std::map<int, int> &mappings,
257  UsedLabels &used_labels, const std::set<int> &indexLabels,
258  const std::map<int, std::vector<int>> &extraAtomRLabels) {
259  // Now remap to proper rlabel ids
260  // if labels are positive, they come from User labels
261  // if they are negative, they come from indices and should be
262  // numbered *after* the user labels.
263  //
264  // Some indices are attached to multiple bonds,
265  // these rlabels should be incrementally added last
266  std::map<int, Atom *> atoms = getRlabels(core);
267  // a core only has one labelled index
268  // a secondary structure extraAtomRLabels contains the number
269  // of bonds between this atom and the side chain
270 
271  // a sidechain atom has a vector of the attachments back to the
272  // core that takes the place of numBondsToRlabel
273 
274  std::map<int, std::vector<int>> bondsToCore;
275  std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
276 
277  // Deal with user supplied labels
278  for (const auto &rlabels : atoms) {
279  int userLabel = rlabels.first;
280  if (userLabel < 0) {
281  continue; // not a user specified label
282  }
283  Atom *atom = rlabels.second;
284  mappings[userLabel] = userLabel;
285  used_labels.add(userLabel);
286 
287  if (atom->getAtomicNum() == 0 &&
288  atom->getDegree() == 1) { // add to existing dummy/rlabel
289  setRlabel(atom, userLabel);
290  } else { // adds new rlabel
291  auto *newAt = new Atom(0);
292  setRlabel(newAt, userLabel);
293  atomsToAdd.emplace_back(atom, newAt);
294  }
295  }
296 
297  // Deal with non-user supplied labels
298  for (auto newLabel : indexLabels) {
299  auto atm = atoms.find(newLabel);
300  if (atm == atoms.end()) {
301  continue;
302  }
303 
304  Atom *atom = atm->second;
305 
306  int rlabel;
307  auto mapping = mappings.find(newLabel);
308  if (mapping == mappings.end()) {
309  rlabel = used_labels.next();
310  mappings[newLabel] = rlabel;
311  } else {
312  rlabel = mapping->second;
313  }
314 
315  if (atom->getAtomicNum() == 0 &&
317  *atom)) { // add to dummy
318  setRlabel(atom, rlabel);
319  } else {
320  auto *newAt = new Atom(0);
321  setRlabel(newAt, rlabel);
322  atomsToAdd.emplace_back(atom, newAt);
323  }
324  }
325 
326  // Deal with multiple bonds to the same label
327  for (const auto &extraAtomRLabel : extraAtomRLabels) {
328  auto atm = atoms.find(extraAtomRLabel.first);
329  if (atm == atoms.end()) {
330  continue; // label not used in the rgroup
331  }
332  Atom *atom = atm->second;
333 
334  for (size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
335  int rlabel = used_labels.next();
336  // Is this necessary?
338  atom->getAtomicNum() > 1,
339  "Multiple attachments to a dummy (or hydrogen) is weird.");
340  auto *newAt = new Atom(0);
341  setRlabel(newAt, rlabel);
342  atomsToAdd.emplace_back(atom, newAt);
343  }
344  }
345 
346  addAtoms(core, atomsToAdd);
347  for (const auto &rlabels : atoms) {
348  auto atom = rlabels.second;
349  atom->clearProp(RLABEL);
350  atom->clearProp(RLABEL_TYPE);
351  }
352  core.updatePropertyCache(false); // this was github #1550
353  }
354 
355  void relabelRGroup(RGroupData &rgroup, const std::map<int, int> &mappings) {
356  PRECONDITION(rgroup.combinedMol.get(), "Unprocessed rgroup");
357 
358  RWMol &mol = *rgroup.combinedMol.get();
359 
360  if (rgroup.combinedMol->hasProp(done)) {
361  rgroup.labelled = true;
362  return;
363  }
364 
365  mol.setProp(done, true);
366  std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
367  std::map<int, int> rLabelCoreIndexToAtomicWt;
368 
369  for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
370  ++atIt) {
371  Atom *atom = *atIt;
372  if (atom->hasProp(SIDECHAIN_RLABELS)) {
373  atom->setIsotope(0);
374  const std::vector<int> &rlabels =
375  atom->getProp<std::vector<int>>(SIDECHAIN_RLABELS);
376  // switch on atom mappings or rlabels....
377 
378  for (int rlabel : rlabels) {
379  auto label = mappings.find(rlabel);
380  CHECK_INVARIANT(label != mappings.end(), "Unprocessed mapping");
381 
382  if (atom->getAtomicNum() == 0) {
383  setRlabel(atom, label->second);
384  } else if (atom->hasProp(RLABEL_CORE_INDEX)) {
385  atom->setAtomicNum(0);
386  setRlabel(atom, label->second);
387  } else {
388  auto *newAt = new Atom(0);
389  setRlabel(newAt, label->second);
390  atomsToAdd.emplace_back(atom, newAt);
391  }
392  }
393  }
394  if (atom->hasProp(RLABEL_CORE_INDEX)) {
395  // convert to dummy as we don't want to collapse hydrogens onto the core
396  // match
397  auto rLabelCoreIndex = atom->getProp<int>(RLABEL_CORE_INDEX);
398  rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->getAtomicNum();
399  atom->setAtomicNum(0);
400  }
401  }
402 
403  addAtoms(mol, atomsToAdd);
404 
406  RDLog::BlockLogs blocker;
407  bool implicitOnly = false;
408  bool updateExplicitCount = false;
409  bool sanitize = false;
410  MolOps::removeHs(mol, implicitOnly, updateExplicitCount, sanitize);
411  }
412 
413  mol.updatePropertyCache(false); // this was github #1550
414  rgroup.labelled = true;
415 
416  // Restore any core matches that we have set to dummy
417  for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
418  ++atIt) {
419  Atom *atom = *atIt;
420  if (atom->hasProp(RLABEL_CORE_INDEX)) {
421  // don't need to set IsAromatic on atom - that seems to have been saved
422  atom->setAtomicNum(
423  rLabelCoreIndexToAtomicWt[atom->getProp<int>(RLABEL_CORE_INDEX)]);
424  atom->setNoImplicit(true);
426  }
428  }
429 
430 #ifdef VERBOSE
431  std::cerr << "Relabel Rgroup smiles " << MolToSmiles(mol) << std::endl;
432 #endif
433  }
434 
435  // relabel the core and sidechains using the specified user labels
436  // if matches exist for non labelled atoms, these are added as well
437  void relabel() {
438  std::vector<RGroupMatch> best = GetCurrentBestPermutation();
439 
440  // get the labels used
441  std::set<int> userLabels;
442  std::set<int> indexLabels;
443 
444  // Go through all the RGroups and find out which labels were
445  // actually used.
446 
447  // some atoms will have multiple attachment points, i.e. cycles
448  // split these up into new rlabels if necessary
449  // These are detected at match time
450  // This vector will hold the extra (new) labels required
451  std::map<int, std::vector<int>> extraAtomRLabels;
452 
453  for (auto &it : best) {
454  for (auto &rgroup : it.rgroups) {
455  if (rgroup.first > 0) {
456  userLabels.insert(rgroup.first);
457  }
458  if (rgroup.first < 0 && !params.onlyMatchAtRGroups) {
459  indexLabels.insert(rgroup.first);
460  }
461 
462  std::map<int, int> rlabelsUsedInRGroup =
463  rgroup.second->getNumBondsToRlabels();
464  for (auto &numBondsUsed : rlabelsUsedInRGroup) {
465  // Make space for the extra labels
466  if (numBondsUsed.second > 1) { // multiple rgroup bonds to same atom
467  extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
468  1);
469  }
470  }
471  }
472  }
473 
474  // find user labels that are not present in the decomposition
475  for (auto &core : cores) {
476  core.second.labelledCore.reset(new RWMol(*core.second.core));
477  addCoreUserLabels(*core.second.labelledCore, userLabels);
478  }
479 
480  // Assign final RGroup labels to the cores and propagate these to
481  // the scaffold
482  finalRlabelMapping.clear();
483 
484  UsedLabels used_labels;
485  // Add all the user labels now to prevent an index label being assigned to a
486  // user label when multiple cores are present (e.g. the user label is
487  // present in the second core, but not the first).
488  for (auto userLabel : userLabels) {
489  used_labels.add(userLabel);
490  }
491  for (auto &core : cores) {
492  relabelCore(*core.second.labelledCore, finalRlabelMapping, used_labels,
493  indexLabels, extraAtomRLabels);
494  }
495 
496  for (auto &it : best) {
497  for (auto &rgroup : it.rgroups) {
498  relabelRGroup(*rgroup.second, finalRlabelMapping);
499  }
500  }
501 
502  std::set<int> uniqueMappedValues;
503  std::transform(finalRlabelMapping.cbegin(), finalRlabelMapping.cend(),
504  std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
505  [](const std::pair<int, int> &p) { return p.second; });
506  CHECK_INVARIANT(finalRlabelMapping.size() == uniqueMappedValues.size(),
507  "Error in uniqueness of final RLabel mapping");
509  uniqueMappedValues.size() == userLabels.size() + indexLabels.size(),
510  "Error in final RMapping size");
511  }
512 
513  double score(const std::vector<size_t> &permutation,
514  FingerprintVarianceScoreData *fingerprintVarianceScoreData =
515  nullptr) const {
516  RGroupScore scoreMethod = static_cast<RGroupScore>(params.scoreMethod);
517  switch (scoreMethod) {
518  case Match:
520  break;
521  case FingerprintVariance:
523  fingerprintVarianceScoreData);
524  break;
525  default:;
526  }
527  return NAN;
528  }
529 
531  bool finalize = false) {
532  if (matches.empty()) {
533  return RGroupDecompositionProcessResult(false, -1);
534  }
535  auto t0 = std::chrono::steady_clock::now();
536  std::unique_ptr<CartesianProduct> iterator;
538 
539  if (params.matchingStrategy == GA) {
540  RGroupGa ga(*this, params.timeout >= 0 ? &t0 : nullptr);
541  if (ga.numberPermutations() < 100 * ga.getPopsize()) {
543  } else {
544  if (params.gaNumberRuns > 1) {
545  auto results = ga.runBatch();
546  auto best = max_element(results.begin(), results.end(),
547  [](const GaResult &a, const GaResult &b) {
548  return a.rGroupScorer.getBestScore() <
549  b.rGroupScorer.getBestScore();
550  });
551  rGroupScorer = best->rGroupScorer;
552  } else {
553  auto result = ga.run();
554  rGroupScorer = result.rGroupScorer;
555  }
556  }
557  }
558  size_t offset = 0;
559  if (params.matchingStrategy != GA) {
560  // Exhaustive search, get the MxN matrix
561  // (M = matches.size(): number of molecules
562  // N = iterator.maxPermutations)
563  std::vector<size_t> permutations;
564 
565  if (pruneMatches && params.scoreMethod != FingerprintVariance) {
566  offset = previousMatchSize;
567  }
568  previousMatchSize = matches.size();
569  std::transform(
570  matches.begin() + offset, matches.end(),
571  std::back_inserter(permutations),
572  [](const std::vector<RGroupMatch> &m) { return m.size(); });
573  permutation = std::vector<size_t>(permutations.size(), 0);
574 
575  // run through all possible matches and score each
576  // set
577  size_t count = 0;
578 #ifdef DEBUG
579  std::cerr << "Processing" << std::endl;
580 #endif
581  std::unique_ptr<CartesianProduct> it(new CartesianProduct(permutations));
582  iterator = std::move(it);
583  // Iterates through the permutation idx, i.e.
584  // [m1_permutation_idx, m2_permutation_idx, m3_permutation_idx]
585 
586  while (iterator->next()) {
587  if (count > iterator->maxPermutations) {
588  throw ValueErrorException("next() did not finish");
589  }
590 #ifdef DEBUG
591  std::cerr << "**************************************************"
592  << std::endl;
593 #endif
594  double newscore = params.scoreMethod == FingerprintVariance
595  ? scoreFromPrunedData(iterator->permutation)
596  : score(iterator->permutation);
597 
598  if (fabs(newscore - rGroupScorer.getBestScore()) <
599  1e-6) { // heuristic to overcome floating point comparison issues
600  rGroupScorer.pushTieToStore(iterator->permutation);
601  } else if (newscore > rGroupScorer.getBestScore()) {
602 #ifdef DEBUG
603  std::cerr << " ===> current best:" << newscore << ">"
604  << rGroupScorer.getBestScore() << std::endl;
605 #endif
606  rGroupScorer.setBestPermutation(iterator->permutation, newscore);
608  rGroupScorer.pushTieToStore(iterator->permutation);
609  }
610  ++count;
611  }
612  }
613 
614  if (rGroupScorer.tieStoreSize() > 1) {
617  } else {
619  }
621  if (pruneMatches || finalize) {
622  prune();
623  }
624 
625  if (finalize) {
626  relabel();
627  }
628 
630  }
631 };
632 } // namespace RDKit
633 
634 #endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
The class for representing atoms.
Definition: Atom.h:68
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
Definition: Atom.h:209
void setAtomicNum(int newNum)
sets our atomic number
Definition: Atom.h:117
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:115
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
Definition: Atom.h:363
unsigned int getDegree() const
@ SINGLE
Definition: Bond.h:57
void clearProp(const std::string &key) const
clears the value of a property
Definition: RDProps.h:132
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
Definition: RDProps.h:102
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: RDProps.h:121
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
Definition: RDProps.h:72
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
Definition: RGroupGa.h:127
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void breakTies(const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
Definition: RGroupScore.h:83
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
Definition: RGroupScore.h:99
double getBestScore() const
return the best score found so far
Definition: RGroupScore.h:101
unsigned int getNumConformers() const
Definition: ROMol.h:479
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
static std::string to_string(const Descriptor &desc)
Definition: Descriptor.h:54
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
Std stuff.
Definition: Abbreviations.h:18
@ FingerprintVariance
Definition: RGroupDecomp.h:63
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
@ Exhaustive
Definition: RGroupDecomp.h:43
const std::string RLABEL_TYPE
const std::string RLABEL_CORE_INDEX
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
const std::string done
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:207
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
Definition: RGroupUtils.h:66
const std::string SIDECHAIN_RLABELS
const unsigned int EMPTY_CORE_LABEL
Definition: RGroupUtils.h:25
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
iterate through all possible permutations of the rgroups
Definition: RGroupScore.h:20
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
Definition: RGroupCore.h:24
A single rgroup attached to a given core.
Definition: RGroupData.h:27
boost::shared_ptr< RWMol > combinedMol
Definition: RGroupData.h:28
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * >> &atomsToAdd)
std::vector< int > processedRlabels
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int >> &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
Definition: RGroupDecomp.h:84
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
Definition: RGroupDecomp.h:86
double timeout
timeout in seconds. <=0 indicates no timeout
Definition: RGroupDecomp.h:95
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
Definition: RGroupDecomp.h:91
bool prepareCore(RWMol &, const RWMol *alignCore)