RDKit
Open-source cheminformatics and machine learning.
SLNParseOps.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior
18 // written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Created by Greg Landrum, September 2006
33 //
34 #ifndef __RD_SLNPARSEOPS_H__
35 #define __RD_SLNPARSEOPS_H__
36 
37 #include <vector>
40 #include <GraphMol/RDKitBase.h>
41 #include <GraphMol/RDKitQueries.h>
42 #include <boost/lexical_cast.hpp>
43 
44 namespace RDKit{
45  namespace SLNParse{
46  namespace {
47  //! set a bookmark in the molecule if the atom has an associated ID:
48  void bookmarkAtomID(RWMol *mp,Atom *atom){
49  PRECONDITION(mp,"bad molecule");
50  PRECONDITION(atom,"bad atom");
51  unsigned int label;
52  if(atom->getPropIfPresent(common_properties::_AtomID, label)){
53  if(mp->hasAtomBookmark(label)){
54  std::stringstream err;
55  err << "SLN Parser error: Atom ID " << label << " used a second time.";
56  throw SLNParseException(err.str());
57  }
58  if(mp->hasBondBookmark(label)){
59  std::stringstream err;
60  err << "SLN Parser error: Atom ID " << label << " appears *after* its ring closure.";
61  throw SLNParseException(err.str());
62  }
63  mp->setAtomBookmark(atom,label);
64  }
65  }
66 
67  //! adds a bond, being careful to handle aromaticity properly
68  template<typename BondType>
69  void addBondToMol(RWMol *mp,BondType *bond){
70  PRECONDITION(mp,"null molecule");
71  PRECONDITION(bond,"null bond");
72  mp->addBond(bond,true);
73  if(bond->getBondType()==Bond::AROMATIC){
74  // SLN doesn't have aromatic atom types, aromaticity is a property
75  // of the bonds themselves, so we need to set the atom types:
76  bond->setIsAromatic(true);
77  bond->getBeginAtom()->setIsAromatic(true);
78  bond->getEndAtom()->setIsAromatic(true);
79  }
80  }
81  }// end of anonymous namespace
82 
83  // ------------------------------------------------------------------------------------
84  //! initialize a molecule
85  template <typename AtomType>
86  int startMol(std::vector<RWMol *> &molList,AtomType *firstAtom,bool doingQuery){
87  PRECONDITION(firstAtom,"empty atom");
88  RWMol *mp = new RWMol();
89  mp->addAtom(firstAtom,true,true);
90  bookmarkAtomID(mp,firstAtom);
91 
92  if(!doingQuery){
93  // add any hydrogens that are set on the atom, otherwise getting the numbering right
94  // is just too hard:
95  for(unsigned int i=0;i<firstAtom->getNumExplicitHs();++i){
96  int hIdx=mp->addAtom(new Atom(1),false,true);
97  mp->addBond(0,hIdx,Bond::SINGLE);
98  }
99  firstAtom->setNumExplicitHs(0);
100  }
101 
102  int sz = molList.size();
103  molList.push_back(mp);
104  return sz;
105  };
106 
107  // ------------------------------------------------------------------------------------
108  //! adds an atom to a molecule
109  template<typename AtomType,typename BondType>
110  void addAtomToMol(std::vector<RWMol *> &molList,unsigned int idx,AtomType *atom,
111  BondType *bond,bool doingQuery){
112  PRECONDITION(idx<molList.size(),"bad index");
113  RWMol *mp=molList[idx];
114  PRECONDITION(mp,"null molecule");
115  PRECONDITION(atom,"empty atom");
116  PRECONDITION(bond,"null bond");
117 
118  Atom *a1 = mp->getActiveAtom();
119  int atomIdx1=a1->getIdx();
120  int atomIdx2=mp->addAtom(atom,true,true);
121  bookmarkAtomID(mp,atom);
122  bond->setOwningMol(mp);
123  bond->setBeginAtomIdx(atomIdx1);
124  bond->setEndAtomIdx(atomIdx2);
125  addBondToMol(mp,bond);
126 
127  if(!doingQuery){
128  // add any hydrogens that are set on the atom, otherwise getting the numbering right
129  // is just too hard:
130  for(unsigned int i=0;i<atom->getNumExplicitHs();++i){
131  int hIdx=mp->addAtom(new Atom(1),false,true);
132  mp->addBond(atomIdx2,hIdx,Bond::SINGLE);
133  }
134  atom->setNumExplicitHs(0);
135  }
136  }
137  //! \overload
138  template<typename AtomType>
139  void addAtomToMol(std::vector<RWMol *> &molList,unsigned int idx,AtomType *atom,bool doingQuery){
140  addAtomToMol(molList,idx,atom,new Bond(Bond::SINGLE),doingQuery);
141  }
142 
143  // ------------------------------------------------------------------------------------
144  //! closes an indexed ring in a molecule using the bond provided
145  // The bond is formed from the atom in the molecule with the
146  // corresponding bookmark to the active atom
147  //
148  template <typename BondType>
149  void closeRingBond(std::vector<RWMol *> &molList,unsigned int molIdx,
150  unsigned int ringIdx,BondType *bond,
151  bool postponeAllowed=true){
152  PRECONDITION(molIdx<molList.size(),"bad index");
153  RWMol *mp=molList[molIdx];
154  PRECONDITION(mp,"null molecule");
155  PRECONDITION(bond,"Null bond");
156 
157  if(!mp->hasAtomBookmark(ringIdx)){
158  if(postponeAllowed){
159  // save it for later:
160  bond->setOwningMol(mp);
161  bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
162  mp->setBondBookmark(bond,ringIdx);
163  return;
164  } else {
165  std::stringstream err;
166  err << "SLN Parser error: Ring closure " << ringIdx << " does not have a corresponding opener.";
167  throw SLNParseException(err.str());
168  }
169  }
170  Atom *opener=mp->getAtomWithBookmark(ringIdx);
171  CHECK_INVARIANT(opener,"invalid atom");
172 
173  Atom *closer=mp->getActiveAtom();
174  bond->setOwningMol(mp);
175  bond->setBeginAtom(opener);
176  bond->setEndAtom(closer);
177  addBondToMol(mp,bond);
178  };
179  //! \overload
180  void closeRingBond(std::vector<RWMol *> &molList,unsigned int molIdx,unsigned int ringIdx){
181  closeRingBond(molList,molIdx,ringIdx,new Bond(Bond::SINGLE));
182  };
183 
184  // ------------------------------------------------------------------------------------
185  // NOTE: this takes over responsibility for the bond
186  template <typename BondType>
187  int addBranchToMol(std::vector<RWMol *> &molList,unsigned int molIdx,
188  unsigned int branchIdx,BondType *&bond){
189  PRECONDITION(molIdx<molList.size(),"bad index");
190  RWMol *mp=molList[molIdx];
191  PRECONDITION(mp,"null molecule");
192  PRECONDITION(branchIdx<molList.size(),"bad index");
193  RWMol *branch=molList[branchIdx];
194  PRECONDITION(branch,"null branch");
195  PRECONDITION(bond,"null bond");
196 
197  unsigned int activeAtomIdx=mp->getActiveAtom()->getIdx();
198  unsigned int nOrigAtoms=mp->getNumAtoms();
199 
200  //
201  // Add the fragment's atoms and bonds to the molecule:
202  //
203  mp->insertMol(*branch);
204 
205  // copy in any atom bookmarks from the branch:
206  for(ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt=branch->getAtomBookmarks()->begin();
207  bmIt != branch->getAtomBookmarks()->end();++bmIt){
208  if(bmIt->first<0) continue;
209  if(mp->hasAtomBookmark(bmIt->first)){
210  std::stringstream err;
211  err << "SLN Parser error: Atom ID " << bmIt->first << " used a second time.";
212  throw SLNParseException(err.str());
213  } else if(mp->hasBondBookmark(bmIt->first)){
214  std::stringstream err;
215  err << "SLN Parser error: Atom ID " << bmIt->first << " appears *after* its ring closure.";
216  throw SLNParseException(err.str());
217  }
218  else {
219  CHECK_INVARIANT(bmIt->second.size()==1,"bad atom bookmark list on branch");
220  Atom *tgtAtom=mp->getAtomWithIdx((*bmIt->second.begin())->getIdx()+nOrigAtoms);
221  mp->setAtomBookmark(tgtAtom,bmIt->first);
222  }
223  }
224 
225  // loop over bond bookmarks in the branch and close the corresponding rings
226  for(ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt=branch->getBondBookmarks()->begin();
227  bmIt != branch->getBondBookmarks()->end();++bmIt){
228  CHECK_INVARIANT(bmIt->second.size()>=1,"bad bond bookmark list on branch");
229  for(ROMol::BOND_PTR_LIST::const_iterator bondIt=bmIt->second.begin();
230  bondIt!=bmIt->second.end();++bondIt){
231  Bond *tgtBond=*bondIt;
232  if(bmIt->first>0 && mp->hasAtomBookmark(bmIt->first)){
233  Atom *tmpAtom=mp->getActiveAtom();
234  mp->setActiveAtom(mp->getAtomWithIdx(tgtBond->getEndAtomIdx()+nOrigAtoms));
235  closeRingBond(molList,molIdx,bmIt->first,tgtBond,false);
236  mp->setActiveAtom(tmpAtom);
237  } else {
238  // no partner found yet, copy into this mol:
239  tgtBond->setOwningMol(mp);
240  tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx()+nOrigAtoms);
241  mp->setBondBookmark(tgtBond,bmIt->first);
242  }
243  }
244  }
245 
246  // set the connecting bond:
247  if(bond->getBondType()!=Bond::IONIC){
248  bond->setOwningMol(mp);
249  bond->setBeginAtomIdx(activeAtomIdx);
250  bond->setEndAtomIdx(nOrigAtoms);
251  addBondToMol(mp,bond);
252  } else {
253  delete bond;
254  }
255  bond=0;
256 
257  delete branch;
258  unsigned int sz = molList.size();
259  if ( sz==branchIdx+1) {
260  molList.resize( sz-1 );
261  }
262  return molIdx;
263  };
264  //! \overload
265  int addBranchToMol(std::vector<RWMol *> &molList,unsigned int molIdx,unsigned int branchIdx){
266  Bond *newBond=new Bond(Bond::SINGLE);
267  return addBranchToMol(molList,molIdx,branchIdx,newBond);
268  };
269 
270  // ------------------------------------------------------------------------------------
271  //! adds the atoms and bonds from a fragment to the molecule, sets no bond between them
272  int addFragToMol(std::vector<RWMol *> &molList,unsigned int molIdx,unsigned int fragIdx){
273  Bond *newBond=new Bond(Bond::IONIC);
274  return addBranchToMol(molList,molIdx,fragIdx,newBond);
275  }
276 
277  //! convenience function to convert the argument to a string
278  template <typename T>
279  std::string convertToString(T val){
280  std::string res=boost::lexical_cast<std::string>(val);
281  return res;
282  }
283 
284  void CleanupAfterParseError(RWMol *mol){
285  PRECONDITION(mol,"no molecule");
286  // blow out any partial bonds:
287  RWMol::BOND_BOOKMARK_MAP *marks = mol->getBondBookmarks();
288  RWMol::BOND_BOOKMARK_MAP::iterator markI=marks->begin();
289  while(markI != marks->end()){
290  RWMol::BOND_PTR_LIST &bonds=markI->second;
291  for(RWMol::BOND_PTR_LIST::iterator bondIt=bonds.begin();
292  bondIt!=bonds.end();++bondIt){
293  delete *bondIt;
294  }
295  ++markI;
296  }
297  }
298  } // end of namespace SLNParse
299 } // end of namespace RDKit
300 #endif
std::string convertToString(T val)
convenience function to convert the argument to a string
Definition: SLNParseOps.h:279
int addBranchToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int branchIdx, BondType *&bond)
Definition: SLNParseOps.h:187
int addFragToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int fragIdx)
adds the atoms and bonds from a fragment to the molecule, sets no bond between them ...
Definition: SLNParseOps.h:272
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
void closeRingBond(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int ringIdx, BondType *bond, bool postponeAllowed=true)
closes an indexed ring in a molecule using the bond provided
Definition: SLNParseOps.h:149
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:114
pulls in the RDKit Query functionality
unsigned int getIdx() const
returns our index within the ROMol
Definition: Atom.h:124
pulls in the core RDKit functionality
int startMol(std::vector< RWMol * > &molList, AtomType *firstAtom, bool doingQuery)
initialize a molecule
Definition: SLNParseOps.h:86
void addAtomToMol(std::vector< RWMol * > &molList, unsigned int idx, AtomType *atom, BondType *bond, bool doingQuery)
adds an atom to a molecule
Definition: SLNParseOps.h:110
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:177
class for representing a bond
Definition: Bond.h:46
void setOwningMol(ROMol *other)
sets our owning molecule
void setOwningMol(ROMol *other)
sets our owning molecule
void CleanupAfterParseError(RWMol *mol)
Definition: SLNParseOps.h:284
void setEndAtomIdx(unsigned int what)
sets the index of our end Atom
const std::string _AtomID
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
The class for representing atoms.
Definition: Atom.h:67
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms