RDKit
Open-source cheminformatics and machine learning.
ChemTransforms.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2006-2012 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_CHEMTRANSFORMS_H__
12 #define _RD_CHEMTRANSFORMS_H__
13 
14 #include <boost/smart_ptr.hpp>
15 #include <vector>
16 #include <iostream>
17 
19 #include "MolFragmenter.h"
20 
21 namespace RDKit {
22 class ROMol;
23 typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
24 
25 //! \brief Returns a copy of an ROMol with the atoms and bonds that
26 //! match a pattern removed.
27 /*!
28  \param mol the ROMol of interest
29  \param query the query ROMol
30  \param onlyFrags if this is set, atoms will only be removed if
31  the entire fragment in which they are found is
32  matched by the query.
33  \param useChirality - if set, match the coreQuery using chirality
34 
35  \return a copy of \c mol with the matching atoms and bonds (if any)
36  removed.
37 */
39  bool onlyFrags = false, bool useChirality = false);
40 
41 //! \brief Returns a list of copies of an ROMol with the atoms and bonds that
42 //! match a pattern replaced with the atoms contained in another molecule.
43 /*!
44  Bonds are created between the joining atom in the existing molecule
45  and the atoms in the new molecule. So, using SMILES instead of molecules:
46  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
47  ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
48  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
49  ['[X]NCCN[X]']
50  Chains should be handled "correctly":
51  replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
52  ['C[X]C']
53  As should rings:
54  replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
55  ['C1[X]C1']
56  And higher order branches:
57  replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
58  ['C[X](C)C']
59  Note that the client is responsible for making sure that the
60  resulting molecule actually makes sense - this function does not
61  perform sanitization.
62 
63  \param mol the ROMol of interest
64  \param query the query ROMol
65  \param replacement the ROMol to be inserted
66  \param replaceAll if this is true, only a single result, with all
67  \param useChirality - if set, match the coreQuery using chirality
68 
69  occurances
70  of the substructure replaced, will be returned.
71  \param replacementConnectionPoint index of the atom in the replacement
72  that
73  the bond should made to
74 
75  \return a vector of pointers to copies of \c mol with the matching atoms
76  and bonds (if any) replaced
77 
78 */
79 RDKIT_CHEMTRANSFORMS_EXPORT std::vector<ROMOL_SPTR> replaceSubstructs(
80  const ROMol &mol, const ROMol &query, const ROMol &replacement,
81  bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
82  bool useChirality = false);
83 
84 //! \brief Returns a copy of an ROMol with the atoms and bonds that
85 //! don't fall within a substructure match removed.
86 //!
87 //! dummy atoms are left to indicate attachment points.
88 //!
89 /*!
90  \param mol the ROMol of interest
91  \param coreQuery a query ROMol to be used to match the core
92  \param useChirality - if set, match the coreQuery using chirality
93 
94  \return a copy of \c mol with the non-matching atoms and bonds (if any)
95  removed and dummies at the connection points.
96 */
97 
98 
99 RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery,
100  bool useChirality = false);
101 
102 //! \brief Returns a copy of an ROMol with the atoms and bonds that
103 //! are referenced by the MatchVector removed.
104 //! MatchVector must be defined between mol and the specified core.
105 //!
106 //! dummy atoms are left to indicate attachment points.
107 //! These dummy atoms can be labeled either by the matching index
108 //! in the query or by an arbitrary "first match" found.
109 //! Additional matching options are given below.
110 //!
111 /*!
112  Note that this is essentially identical to the replaceSidechains function,
113  except we
114  invert the query and replace the atoms that *do* match the query.
115 
116  \param mol - the ROMol of interest
117  \param core - the core being matched against
118  \param matchVect - a matchVect of the type returned by Substructure Matching
119  \param replaceDummies - if set, atoms matching dummies in the core will also
120  be replaced
121  \param labelByIndex - if set, the dummy atoms at attachment points are
122  labelled with the
123  index+1 of the corresponding atom in the core
124  \param requireDummyMatch - if set, only side chains that are connected to
125  atoms in
126  the core that have attached dummies will be
127  considered.
128  Molecules that have sidechains that are attached
129  at other points will be rejected (NULL returned).
130  \param useChirality - if set, match the coreQuery using chirality
131 
132  \return a copy of \c mol with the non-matching atoms and bonds (if any)
133  removed and dummies at the connection points. The client is
134  responsible
135  for deleting this molecule. If the core query is not matched, NULL
136  is returned.
137 */
138 RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceCore(const ROMol &mol, const ROMol &core,
139  const MatchVectType &matchVect,
140  bool replaceDummies = true,
141  bool labelByIndex = false,
142  bool requireDummyMatch = false);
143 
144 //! \brief Returns a copy of an ROMol with the atoms and bonds that
145 //! do fall within a substructure match removed.
146 //!
147 //! dummy atoms are left to indicate attachment points.
148 //!
149 /*!
150  Note that this is essentially identical to the replaceSidechains function,
151  except we
152  invert the query and replace the atoms that *do* match the query.
153 
154  \param mol - the ROMol of interest
155  \param coreQuery - a query ROMol to be used to match the core
156  \param replaceDummies - if set, atoms matching dummies in the core will also
157  be replaced
158  \param labelByIndex - if set, the dummy atoms at attachment points are
159  labelled with the
160  index+1 of the corresponding atom in the core
161  \param requireDummyMatch - if set, only side chains that are connected to
162  atoms in
163  the core that have attached dummies will be
164  considered.
165  Molecules that have sidechains that are attached
166  at other points will be rejected (NULL returned).
167  \param useChirality - if set, match the coreQuery using chirality
168 
169  \return a copy of \c mol with the non-matching atoms and bonds (if any)
170  removed and dummies at the connection points. The client is
171  responsible
172  for deleting this molecule. If the core query is not matched, NULL
173  is returned.
174 */
175 RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
176  bool replaceDummies = true, bool labelByIndex = false,
177  bool requireDummyMatch = false, bool useChirality = false);
178 
179 //! \brief Carries out a Murcko decomposition on the molecule provided
180 //!
181 /*!
182 
183  \param mol - the ROMol of interest
184 
185  \return a new ROMol with the Murcko scaffold
186  The client is responsible for deleting this molecule.
187 */
189 
190 //! \brief Combined two molecules to create a new one
191 //!
192 /*!
193 
194  \param mol1 - the first ROMol to be combined
195  \param mol2 - the second ROMol to be combined
196  \param offset - a constant offset to be added to every
197  atom position in mol2
198 
199  \return a new ROMol with the two molecules combined.
200  The new molecule has not been sanitized.
201  The client is responsible for deleting this molecule.
202 */
203 RDKIT_CHEMTRANSFORMS_EXPORT ROMol *combineMols(const ROMol &mol1, const ROMol &mol2,
204  RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
205 
206 //! \brief Adds named recursive queries to a molecule's atoms based on atom
207 // labels
208 //!
209 /*!
210 
211  \param mol - the molecule to be modified
212  \param queries - the dictionary of named queries to add
213  \param propName - the atom property to use to get query names
214  \param reactantLabels - to store pairs of (atom index, query string)
215 
216 
217  NOTES:
218  - existing query information, if present, will be supplemented (AND logic)
219  - non-query atoms will be replaced with query atoms using only the query
220  logic
221  - query names can be present as comma separated lists, they will then
222  be combined using OR logic.
223  - throws a KeyErrorException if a particular query name is not present
224  in \c queries
225 
226 */
228  ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
229  const std::string &propName,
230  std::vector<std::pair<unsigned int, std::string> > *reactantLabels = NULL);
231 
232 //! \brief parses a query definition file and sets up a set of definitions
233 //! suitable for use by addRecursiveQueries()
234 /*!
235 
236  \param filename - the name of the file to be read
237  \param queryDefs - the dictionary of named queries (return value)
238  \param standardize - if true, query names will be converted to lower
239  case
240  \param delimiter - the line delimiter in the file
241  \param comment - text used to recognize comment lines
242  \param nameColumn - column with the names of queries
243  \param smartsColumn - column with the SMARTS definitions of the queries
244 
245 */
246 RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename,
247  std::map<std::string, ROMOL_SPTR> &queryDefs,
248  bool standardize = true,
249  const std::string &delimiter = "\t",
250  const std::string &comment = "//",
251  unsigned int nameColumn = 0,
252  unsigned int smartsColumn = 1);
253 //! \overload
254 RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(std::istream *inStream,
255  std::map<std::string, ROMOL_SPTR> &queryDefs,
256  bool standardize = true,
257  const std::string &delimiter = "\t",
258  const std::string &comment = "//",
259  unsigned int nameColumn = 0,
260  unsigned int smartsColumn = 1);
261 //! \brief equivalent to parseQueryDefFile() but the query definitions are
262 // explicitly passed in
263 RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText,
264  std::map<std::string, ROMOL_SPTR> &queryDefs,
265  bool standardize = true,
266  const std::string &delimiter = "\t",
267  const std::string &comment = "//",
268  unsigned int nameColumn = 0,
269  unsigned int smartsColumn = 1);
270 }
271 
272 #endif
RDKIT_CHEMTRANSFORMS_EXPORT void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::pair< unsigned int, std::string > > *reactantLabels=NULL)
Adds named recursive queries to a molecule&#39;s atoms based on atom.
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
RDKIT_CHEMTRANSFORMS_EXPORT std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0, bool useChirality=false)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx) ...
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceCore(const ROMol &mol, const ROMol &core, const MatchVectType &matchVect, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that are referenced by the MatchVector removed...
boost::shared_ptr< ROMol > ROMOL_SPTR
Std stuff.
Definition: Atom.h:30
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed. ...
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\, const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\, const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that don&#39;t fall within a substructure match remov...
#define RDKIT_CHEMTRANSFORMS_EXPORT
Definition: export.h:73