RDKit
Open-source cheminformatics and machine learning.
ChemTransforms.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2006-2012 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_CHEMTRANSFORMS_H__
11 #define _RD_CHEMTRANSFORMS_H__
12 
13 #include <boost/smart_ptr.hpp>
14 #include <vector>
15 #include <iostream>
16 
17 #include "MolFragmenter.h"
18 
19 namespace RDKit{
20  class ROMol;
21  typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
22 
23  //! \brief Returns a copy of an ROMol with the atoms and bonds that
24  //! match a pattern removed.
25  /*!
26  \param mol the ROMol of interest
27  \param query the query ROMol
28  \param onlyFrags if this is set, atoms will only be removed if
29  the entire fragment in which they are found is
30  matched by the query.
31 
32  \return a copy of \c mol with the matching atoms and bonds (if any)
33  removed.
34  */
35  ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query,
36  bool onlyFrags=false);
37 
38  //! \brief Returns a list of copies of an ROMol with the atoms and bonds that
39  //! match a pattern replaced with the atoms contained in another molecule.
40  /*!
41  Bonds are created between the joining atom in the existing molecule
42  and the atoms in the new molecule. So, using SMILES instead of molecules:
43  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
44  ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
45  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
46  ['[X]NCCN[X]']
47  Chains should be handled "correctly":
48  replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
49  ['C[X]C']
50  As should rings:
51  replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
52  ['C1[X]C1']
53  And higher order branches:
54  replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
55  ['C[X](C)C']
56  Note that the client is responsible for making sure that the
57  resulting molecule actually makes sense - this function does not
58  perform sanitization.
59 
60  \param mol the ROMol of interest
61  \param query the query ROMol
62  \param replacement the ROMol to be inserted
63  \param replaceAll if this is true, only a single result, with all occurances
64  of the substructure replaced, will be returned.
65  \param replacementConnectionPoint index of the atom in the replacement that
66  the bond should made to
67 
68  \return a vector of pointers to copies of \c mol with the matching atoms
69  and bonds (if any) replaced
70 
71  */
72  std::vector<ROMOL_SPTR> replaceSubstructs(const ROMol &mol, const ROMol &query,
73  const ROMol &replacement,
74  bool replaceAll=false,
75  unsigned int replacementConnectionPoint=0);
76 
77  //! \brief Returns a copy of an ROMol with the atoms and bonds that
78  //! don't fall within a substructure match removed.
79  //!
80  //! dummy atoms are left to indicate attachment points.
81  //!
82  /*!
83  \param mol the ROMol of interest
84  \param coreQuery a query ROMol to be used to match the core
85 
86  \return a copy of \c mol with the non-matching atoms and bonds (if any)
87  removed and dummies at the connection points.
88  */
89  ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery);
90 
91  //! \brief Returns a copy of an ROMol with the atoms and bonds that
92  //! do fall within a substructure match removed.
93  //!
94  //! dummy atoms are left to indicate attachment points.
95  //!
96  /*!
97  Note that this is essentially identical to the replaceSidechains function, except we
98  invert the query and replace the atoms that *do* match the query.
99 
100  \param mol - the ROMol of interest
101  \param coreQuery - a query ROMol to be used to match the core
102  \param replaceDummies - if set, atoms matching dummies in the core will also be replaced
103  \param labelByIndex - if set, the dummy atoms at attachment points are labelled with the
104  index+1 of the corresponding atom in the core
105  \param requireDummyMatch - if set, only side chains that are connected to atoms in
106  the core that have attached dummies will be considered.
107  Molecules that have sidechains that are attached
108  at other points will be rejected (NULL returned).
109 
110  \return a copy of \c mol with the non-matching atoms and bonds (if any)
111  removed and dummies at the connection points. The client is responsible
112  for deleting this molecule. If the core query is not matched, NULL is returned.
113  */
114  ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
115  bool replaceDummies=true,bool labelByIndex=false,
116  bool requireDummyMatch=false);
117 
118  //! \brief Carries out a Murcko decomposition on the molecule provided
119  //!
120  /*!
121 
122  \param mol - the ROMol of interest
123 
124  \return a new ROMol with the Murcko scaffold
125  The client is responsible for deleting this molecule.
126  */
127  ROMol *MurckoDecompose(const ROMol &mol);
128 
129  //! \brief Combined two molecules to create a new one
130  //!
131  /*!
132 
133  \param mol1 - the first ROMol to be combined
134  \param mol2 - the second ROMol to be combined
135  \param offset - a constant offset to be added to every
136  atom position in mol2
137 
138  \return a new ROMol with the two molecules combined.
139  The new molecule has not been sanitized.
140  The client is responsible for deleting this molecule.
141  */
142  ROMol *combineMols(const ROMol &mol1, const ROMol &mol2,
143  RDGeom::Point3D offset=RDGeom::Point3D(0,0,0));
144 
145  //! \brief Adds named recursive queries to a molecule's atoms based on atom labels
146  //!
147  /*!
148 
149  \param mol - the molecule to be modified
150  \param queries - the dictionary of named queries to add
151  \param propName - the atom property to use to get query names
152  \param reactantLabels - to store pairs of (atom index, query string)
153 
154 
155  NOTES:
156  - existing query information, if present, will be supplemented (AND logic)
157  - non-query atoms will be replaced with query atoms using only the query
158  logic
159  - query names can be present as comma separated lists, they will then
160  be combined using OR logic.
161  - throws a KeyErrorException if a particular query name is not present
162  in \c queries
163 
164  */
165  void addRecursiveQueries(ROMol &mol,const std::map<std::string,ROMOL_SPTR> &queries,std::string propName,
166  std::vector<std::pair<unsigned int, std::string> > *reactantLabels=NULL);
167 
168 
169 
170  //! \brief parses a query definition file and sets up a set of definitions
171  //! suitable for use by addRecursiveQueries()
172  /*!
173 
174  \param filename - the name of the file to be read
175  \param queryDefs - the dictionary of named queries (return value)
176  \param standardize - if true, query names will be converted to lower case
177  \param delimiter - the line delimiter in the file
178  \param comment - text used to recognize comment lines
179  \param nameColumn - column with the names of queries
180  \param smartsColumn - column with the SMARTS definitions of the queries
181 
182  */
183  void parseQueryDefFile(std::string filename,std::map<std::string,ROMOL_SPTR> &queryDefs,
184  bool standardize=true,std::string delimiter="\t",std::string comment="//",
185  unsigned int nameColumn=0,unsigned int smartsColumn=1);
186  //! \overload
187  void parseQueryDefFile(std::istream *inStream,std::map<std::string,ROMOL_SPTR> &queryDefs,
188  bool standardize=true,std::string delimiter="\t",std::string comment="//",
189  unsigned int nameColumn=0,unsigned int smartsColumn=1);
190  //! \brief equivalent to parseQueryDefFile() but the query definitions are explicitly passed in
191  void parseQueryDefText(const std::string &queryDefText,std::map<std::string,ROMOL_SPTR> &queryDefs,
192  bool standardize=true,std::string delimiter="\t",std::string comment="//",
193  unsigned int nameColumn=0,unsigned int smartsColumn=1);
194 
195 }
196 
197 #endif
198 
199 
200 
201 
void parseQueryDefFile(std::string filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, std::string delimiter="\t", std::string comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, std::string propName, std::vector< std::pair< unsigned int, std::string > > *reactantLabels=NULL)
Adds named recursive queries to a molecule&#39;s atoms based on atom labels.
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
ROMol * replaceCore(const ROMol &mol, const ROMol &coreQuery, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that do fall within a substructure match removed...
ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed. ...
boost::shared_ptr< ROMol > ROMOL_SPTR
void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, std::string delimiter="\t", std::string comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are explicitly passed in ...
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...
ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery)
Returns a copy of an ROMol with the atoms and bonds that don&#39;t fall within a substructure match remov...