RDKit
Open-source cheminformatics and machine learning.
SLNParseOps.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior
18 // written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Created by Greg Landrum, September 2006
33 //
34 #include <RDGeneral/export.h>
35 #ifndef __RD_SLNPARSEOPS_H__
36 #define __RD_SLNPARSEOPS_H__
37 
38 #include <vector>
41 #include <GraphMol/RDKitBase.h>
42 #include <GraphMol/RDKitQueries.h>
44 #include <boost/lexical_cast.hpp>
46 
47 namespace RDKit {
48 namespace SLNParse {
49 namespace {
50 //! set a bookmark in the molecule if the atom has an associated ID:
51 void bookmarkAtomID(RWMol *mp, Atom *atom) {
52  PRECONDITION(mp, "bad molecule");
53  PRECONDITION(atom, "bad atom");
54  unsigned int label;
55  if (atom->getPropIfPresent(common_properties::_AtomID, label)) {
56  if (mp->hasAtomBookmark(label)) {
57  std::stringstream err;
58  err << "SLN Parser error: Atom ID " << label << " used a second time.";
59  throw SLNParseException(err.str());
60  }
61  if (mp->hasBondBookmark(label)) {
62  std::stringstream err;
63  err << "SLN Parser error: Atom ID " << label
64  << " appears *after* its ring closure.";
65  throw SLNParseException(err.str());
66  }
67  mp->setAtomBookmark(atom, label);
68  }
69 }
70 
71 //! adds a bond, being careful to handle aromaticity properly
72 template <typename BondType>
73 void addBondToMol(RWMol *mp, BondType *bond) {
74  PRECONDITION(mp, "null molecule");
75  PRECONDITION(bond, "null bond");
76  mp->addBond(bond, true);
77  if (bond->getBondType() == Bond::AROMATIC) {
78  // SLN doesn't have aromatic atom types, aromaticity is a property
79  // of the bonds themselves, so we need to set the atom types:
80  bond->setIsAromatic(true);
81  bond->getBeginAtom()->setIsAromatic(true);
82  bond->getEndAtom()->setIsAromatic(true);
83  }
84 }
85 } // end of anonymous namespace
86 
87 // ------------------------------------------------------------------------------------
88 //! initialize a molecule
89 template <typename AtomType>
90 int startMol(std::vector<RWMol *> &molList, AtomType *firstAtom,
91  bool doingQuery) {
92  PRECONDITION(firstAtom, "empty atom");
93  RWMol *mp = new RWMol();
94  mp->addAtom(firstAtom, true, true);
95  bookmarkAtomID(mp, firstAtom);
96 
97  if (!doingQuery) {
98  // add any hydrogens that are set on the atom, otherwise getting the
99  // numbering right
100  // is just too hard:
101  for (unsigned int i = 0; i < firstAtom->getNumExplicitHs(); ++i) {
102  int hIdx = mp->addAtom(new Atom(1), false, true);
103  mp->addBond(0, hIdx, Bond::SINGLE);
104  }
105  firstAtom->setNumExplicitHs(0);
106  }
107 
108  int sz = molList.size();
109  molList.push_back(mp);
110  return sz;
111 };
112 
113 // ------------------------------------------------------------------------------------
114 //! adds an atom to a molecule
115 template <typename AtomType, typename BondType>
116 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
117  AtomType *atom, BondType *bond, bool doingQuery) {
118  PRECONDITION(idx < molList.size(), "bad index");
119  RWMol *mp = molList[idx];
120  PRECONDITION(mp, "null molecule");
121  PRECONDITION(atom, "empty atom");
122  PRECONDITION(bond, "null bond");
123 
124  Atom *a1 = mp->getActiveAtom();
125  int atomIdx1 = a1->getIdx();
126  int atomIdx2 = mp->addAtom(atom, true, true);
127  bookmarkAtomID(mp, atom);
128  bond->setOwningMol(mp);
129  bond->setBeginAtomIdx(atomIdx1);
130  bond->setEndAtomIdx(atomIdx2);
131  addBondToMol(mp, bond);
132 
133  if (!doingQuery) {
134  // add any hydrogens that are set on the atom, otherwise getting the
135  // numbering right
136  // is just too hard:
137  for (unsigned int i = 0; i < atom->getNumExplicitHs(); ++i) {
138  int hIdx = mp->addAtom(new Atom(1), false, true);
139  mp->addBond(atomIdx2, hIdx, Bond::SINGLE);
140  }
141  atom->setNumExplicitHs(0);
142  }
143 }
144 //! \overload
145 template <typename AtomType>
146 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
147  AtomType *atom, bool doingQuery) {
148  addAtomToMol(molList, idx, atom, new Bond(Bond::SINGLE), doingQuery);
149 }
150 
151 // ------------------------------------------------------------------------------------
152 //! closes an indexed ring in a molecule using the bond provided
153 // The bond is formed from the atom in the molecule with the
154 // corresponding bookmark to the active atom
155 //
156 template <typename BondType>
157 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
158  unsigned int ringIdx, BondType *bond,
159  bool postponeAllowed = true) {
160  PRECONDITION(molIdx < molList.size(), "bad index");
161  RWMol *mp = molList[molIdx];
162  PRECONDITION(mp, "null molecule");
163  PRECONDITION(bond, "Null bond");
164 
165  if (!mp->hasAtomBookmark(ringIdx)) {
166  if (postponeAllowed) {
167  // save it for later:
168  bond->setOwningMol(mp);
169  bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
170  mp->setBondBookmark(bond, ringIdx);
171  return;
172  } else {
173  std::stringstream err;
174  err << "SLN Parser error: Ring closure " << ringIdx
175  << " does not have a corresponding opener.";
176  throw SLNParseException(err.str());
177  }
178  }
179  Atom *opener = mp->getAtomWithBookmark(ringIdx);
180  CHECK_INVARIANT(opener, "invalid atom");
181 
182  Atom *closer = mp->getActiveAtom();
183  bond->setOwningMol(mp);
184  bond->setBeginAtom(opener);
185  bond->setEndAtom(closer);
186  addBondToMol(mp, bond);
187 };
188 //! \overload
189 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
190  unsigned int ringIdx) {
191  closeRingBond(molList, molIdx, ringIdx, new Bond(Bond::SINGLE));
192 };
193 
194 // ------------------------------------------------------------------------------------
195 // NOTE: this takes over responsibility for the bond
196 template <typename BondType>
197 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
198  unsigned int branchIdx, BondType *&bond) {
199  PRECONDITION(molIdx < molList.size(), "bad index");
200  RWMol *mp = molList[molIdx];
201  PRECONDITION(mp, "null molecule");
202  PRECONDITION(branchIdx < molList.size(), "bad index");
203  RWMol *branch = molList[branchIdx];
204  PRECONDITION(branch, "null branch");
205  PRECONDITION(bond, "null bond");
206 
207  unsigned int activeAtomIdx = mp->getActiveAtom()->getIdx();
208  unsigned int nOrigAtoms = mp->getNumAtoms();
209 
210  //
211  // Add the fragment's atoms and bonds to the molecule:
212  //
213  mp->insertMol(*branch);
214 
215  // copy in any atom bookmarks from the branch:
216  for (ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt =
217  branch->getAtomBookmarks()->begin();
218  bmIt != branch->getAtomBookmarks()->end(); ++bmIt) {
219  if (bmIt->first < 0) continue;
220  if (mp->hasAtomBookmark(bmIt->first)) {
221  std::stringstream err;
222  err << "SLN Parser error: Atom ID " << bmIt->first
223  << " used a second time.";
224  throw SLNParseException(err.str());
225  } else if (mp->hasBondBookmark(bmIt->first)) {
226  std::stringstream err;
227  err << "SLN Parser error: Atom ID " << bmIt->first
228  << " appears *after* its ring closure.";
229  throw SLNParseException(err.str());
230  } else {
231  CHECK_INVARIANT(bmIt->second.size() == 1,
232  "bad atom bookmark list on branch");
233  Atom *tgtAtom =
234  mp->getAtomWithIdx((*bmIt->second.begin())->getIdx() + nOrigAtoms);
235  mp->setAtomBookmark(tgtAtom, bmIt->first);
236  }
237  }
238 
239  // loop over bond bookmarks in the branch and close the corresponding rings
240  for (ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt =
241  branch->getBondBookmarks()->begin();
242  bmIt != branch->getBondBookmarks()->end(); ++bmIt) {
243  CHECK_INVARIANT(bmIt->second.size() >= 1,
244  "bad bond bookmark list on branch");
245  for (ROMol::BOND_PTR_LIST::const_iterator bondIt = bmIt->second.begin();
246  bondIt != bmIt->second.end(); ++bondIt) {
247  Bond *tgtBond = *bondIt;
248  if (bmIt->first > 0 && mp->hasAtomBookmark(bmIt->first)) {
249  Atom *tmpAtom = mp->getActiveAtom();
250  mp->setActiveAtom(
251  mp->getAtomWithIdx(tgtBond->getEndAtomIdx() + nOrigAtoms));
252  closeRingBond(molList, molIdx, bmIt->first, tgtBond, false);
253  mp->setActiveAtom(tmpAtom);
254  } else {
255  // no partner found yet, copy into this mol:
256  tgtBond->setOwningMol(mp);
257  tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx() + nOrigAtoms);
258  mp->setBondBookmark(tgtBond, bmIt->first);
259  }
260  }
261  }
262 
263  // set the connecting bond:
264  if (bond->getBondType() != Bond::IONIC) {
265  bond->setOwningMol(mp);
266  bond->setBeginAtomIdx(activeAtomIdx);
267  bond->setEndAtomIdx(nOrigAtoms);
268  addBondToMol(mp, bond);
269  } else {
270  delete bond;
271  }
272  bond = 0;
273 
274  delete branch;
275  unsigned int sz = molList.size();
276  if (sz == branchIdx + 1) {
277  molList.resize(sz - 1);
278  }
279  return molIdx;
280 };
281 //! \overload
282 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
283  unsigned int branchIdx) {
284  Bond *newBond = new Bond(Bond::SINGLE);
285  return addBranchToMol(molList, molIdx, branchIdx, newBond);
286 };
287 
288 // ------------------------------------------------------------------------------------
289 //! adds the atoms and bonds from a fragment to the molecule, sets no bond
290 // between them
291 int addFragToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
292  unsigned int fragIdx) {
293  Bond *newBond = new Bond(Bond::IONIC);
294  return addBranchToMol(molList, molIdx, fragIdx, newBond);
295 }
296 
297 //! convenience function to convert the argument to a string
298 template <typename T>
299 std::string convertToString(T val) {
300  std::string res = boost::lexical_cast<std::string>(val);
301  return res;
302 }
303 
304 void CleanupAfterParseError(RWMol *mol) {
305  PRECONDITION(mol, "no molecule");
306  // blow out any partial bonds:
307  RWMol::BOND_BOOKMARK_MAP *marks = mol->getBondBookmarks();
308  RWMol::BOND_BOOKMARK_MAP::iterator markI = marks->begin();
309  while (markI != marks->end()) {
310  RWMol::BOND_PTR_LIST &bonds = markI->second;
311  for (RWMol::BOND_PTR_LIST::iterator bondIt = bonds.begin();
312  bondIt != bonds.end(); ++bondIt) {
313  delete *bondIt;
314  }
315  ++markI;
316  }
317 }
318 } // end of namespace SLNParse
319 } // end of namespace RDKit
320 #endif
std::string convertToString(T val)
convenience function to convert the argument to a string
Definition: SLNParseOps.h:299
void addAtomToMol(std::vector< RWMol *> &molList, unsigned int idx, AtomType *atom, BondType *bond, bool doingQuery)
adds an atom to a molecule
Definition: SLNParseOps.h:116
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:100
pulls in the RDKit Query functionality
pulls in the core RDKit functionality
int addBranchToMol(std::vector< RWMol *> &molList, unsigned int molIdx, unsigned int branchIdx, BondType *&bond)
Definition: SLNParseOps.h:197
int startMol(std::vector< RWMol *> &molList, AtomType *firstAtom, bool doingQuery)
initialize a molecule
Definition: SLNParseOps.h:90
unsigned int getIdx() const
returns our index within the ROMol
Definition: Atom.h:129
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:182
Std stuff.
Definition: Atom.h:30
class for representing a bond
Definition: Bond.h:47
void setOwningMol(ROMol *other)
sets our owning molecule
void setOwningMol(ROMol *other)
sets our owning molecule
void CleanupAfterParseError(RWMol *mol)
Definition: SLNParseOps.h:304
void setEndAtomIdx(unsigned int what)
sets the index of our end Atom
#define PRECONDITION(expr, mess)
Definition: Invariant.h:108
RDKIT_RDGENERAL_EXPORT const std::string _AtomID
int addFragToMol(std::vector< RWMol *> &molList, unsigned int molIdx, unsigned int fragIdx)
adds the atoms and bonds from a fragment to the molecule, sets no bond
Definition: SLNParseOps.h:291
The class for representing atoms.
Definition: Atom.h:69
void closeRingBond(std::vector< RWMol *> &molList, unsigned int molIdx, unsigned int ringIdx, BondType *bond, bool postponeAllowed=true)
closes an indexed ring in a molecule using the bond provided
Definition: SLNParseOps.h:157
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms