RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FILEPARSERS_H
12 #define _RD_FILEPARSERS_H
13 
14 #include <RDGeneral/types.h>
15 #include <GraphMol/RDKitBase.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <vector>
20 #include <exception>
21 
22 #include <boost/shared_ptr.hpp>
23 
24 namespace RDKit {
25 const int MOLFILE_MAXLINE = 256;
26 RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27 
28 //-----
29 // mol files
30 //-----
31 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
32 // \brief construct a molecule from MDL mol data in a stream
33 /*!
34  * \param inStream - stream containing the data
35  * \param line - current line number (used for error reporting)
36  * \param sanitize - toggles sanitization and stereochemistry
37  * perception of the molecule
38  * \param removeHs - toggles removal of Hs from the molecule. H removal
39  * is only done if the molecule is sanitized
40  * \param line - current line number (used for error reporting)
41  * \param strictParsing - if not set, the parser is more lax about correctness
42  * of the contents.
43  *
44  */
45 RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream *inStream,
46  unsigned int &line,
47  bool sanitize = true,
48  bool removeHs = true,
49  bool strictParsing = true);
50 // \overload
51 RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream &inStream,
52  unsigned int &line,
53  bool sanitize = true,
54  bool removeHs = true,
55  bool strictParsing = true);
56 // \brief construct a molecule from an MDL mol block
57 /*!
58  * \param molBlock - string containing the mol block
59  * \param sanitize - toggles sanitization and stereochemistry
60  * perception of the molecule
61  * \param removeHs - toggles removal of Hs from the molecule. H removal
62  * is only done if the molecule is sanitized
63  * \param strictParsing - if set, the parser is more lax about correctness
64  * of the contents.
65  */
66 RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
67  bool sanitize = true,
68  bool removeHs = true,
69  bool strictParsing = true);
70 
71 // \brief construct a molecule from an MDL mol file
72 /*!
73  * \param fName - string containing the file name
74  * \param sanitize - toggles sanitization and stereochemistry
75  * perception of the molecule
76  * \param removeHs - toggles removal of Hs from the molecule. H removal
77  * is only done if the molecule is sanitized
78  * \param strictParsing - if set, the parser is more lax about correctness
79  * of the contents.
80  */
81 RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
82  bool sanitize = true,
83  bool removeHs = true,
84  bool strictParsing = true);
85 
86 // \brief generates an MDL mol block for a molecule
87 /*!
88  * \param mol - the molecule in question
89  * \param includeStereo - toggles inclusion of stereochemistry information
90  * \param confId - selects the conformer to be used
91  * \param kekulize - triggers kekulization of the molecule before it is
92  * written
93  * \param forceV3000 - force generation a V3000 mol block (happens
94  * automatically with
95  * more than 999 atoms or bonds)
96  */
97 RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol,
98  bool includeStereo = true,
99  int confId = -1,
100  bool kekulize = true,
101  bool forceV3000 = false);
102 // \brief Writes a molecule to an MDL mol file
103 /*!
104  * \param mol - the molecule in question
105  * \param fName - the name of the file to use
106  * \param includeStereo - toggles inclusion of stereochemistry information
107  * \param confId - selects the conformer to be used
108  * \param kekulize - triggers kekulization of the molecule before it is
109  * written
110  * \param forceV3000 - force generation a V3000 mol block (happens
111  * automatically with
112  * more than 999 atoms or bonds)
113  */
115  const ROMol &mol, const std::string &fName, bool includeStereo = true,
116  int confId = -1, bool kekulize = true, bool forceV3000 = false);
117 
118 //-----
119 // TPL handling:
120 //-----
121 
122 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
123 /*!
124  \param inStream: the stream from which to read
125  \param line: used to track the line number of errors
126  \param sanitize: toggles sanitization and stereochemistry
127  perception of the molecule
128  \param skipFirstConf: according to the TPL format description, the atomic
129  coords in the atom-information block describe the first
130  conformation and the first conf block describes second
131  conformation. The CombiCode, on the other hand, writes
132  the first conformation data both to the atom-information
133  block and to the first conf block. We want to be able to
134  read CombiCode-style tpls, so we'll allow this
135  mis-feature
136  to be parsed when this flag is set.
137 */
138 RDKIT_FILEPARSERS_EXPORT RWMol *TPLDataStreamToMol(std::istream *inStream,
139  unsigned int &line,
140  bool sanitize = true,
141  bool skipFirstConf = false);
142 
143 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
144 /*!
145  \param fName: the name of the file from which to read
146  \param sanitize: toggles sanitization and stereochemistry
147  perception of the molecule
148  \param skipFirstConf: according to the TPL format description, the atomic
149  coords in the atom-information block describe the first
150  conformation and the first conf block describes second
151  conformation. The CombiCode, on the other hand, writes
152  the first conformation data both to the atom-information
153  block and to the first conf block. We want to be able to
154  read CombiCode-style tpls, so we'll allow this
155  mis-feature
156  to be parsed when this flag is set.
157 */
158 RDKIT_FILEPARSERS_EXPORT RWMol *TPLFileToMol(const std::string &fName,
159  bool sanitize = true,
160  bool skipFirstConf = false);
161 
163  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
164  bool writeFirstConfTwice = false);
166  const ROMol &mol, const std::string &fName,
167  const std::string &partialChargeProp = "_GasteigerCharge",
168  bool writeFirstConfTwice = false);
169 
170 //-----
171 // MOL2 handling
172 //-----
173 
174 typedef enum {
175  CORINA = 0 //! supports output from Corina and some dbtranslate output
176 } Mol2Type;
177 
178 // \brief construct a molecule from a Tripos mol2 file
179 /*!
180  *
181  * \param fName - string containing the file name
182  * \param sanitize - toggles sanitization of the molecule
183  * \param removeHs - toggles removal of Hs from the molecule. H removal
184  * is only done if the molecule is sanitized
185  * \param variant - the atom type definitions to use
186  * \param cleanupSubstructures - toggles recognition and cleanup of common
187  * substructures
188  */
189 RDKIT_FILEPARSERS_EXPORT RWMol *Mol2FileToMol(const std::string &fName,
190  bool sanitize = true,
191  bool removeHs = true,
192  Mol2Type variant = CORINA,
193  bool cleanupSubstructures = true);
194 
195 // \brief construct a molecule from Tripos mol2 data in a stream
196 /*!
197  * \param inStream - stream containing the data
198  * \param sanitize - toggles sanitization of the molecule
199  * \param removeHs - toggles removal of Hs from the molecule. H removal
200  * is only done if the molecule is sanitized
201  * \param variant - the atom type definitions to use
202  * \param cleanupSubstructures - toggles recognition and cleanup of common
203  * substructures
204  */
206  std::istream *inStream, bool sanitize = true, bool removeHs = true,
207  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
208 // \overload
210  std::istream &inStream, bool sanitize = true, bool removeHs = true,
211  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
212 
213 // \brief construct a molecule from a Tripos mol2 block
214 /*!
215  * \param molBlock - string containing the mol block
216  * \param sanitize - toggles sanitization of the molecule
217  * \param removeHs - toggles removal of Hs from the molecule. H removal
218  * is only done if the molecule is sanitized
219  * \param variant - the atom type definitions to use
220  * \param cleanupSubstructures - toggles recognition and cleanup of common
221  * substructures
222  */
224  const std::string &molBlock, bool sanitize = true, bool removeHs = true,
225  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
226 
228  bool sanitize = true,
229  bool removeHs = true,
230  unsigned int flavor = 0,
231  bool proximityBonding = true);
232 
233 RDKIT_FILEPARSERS_EXPORT RWMol *PDBBlockToMol(const std::string &str,
234  bool sanitize = true,
235  bool removeHs = true,
236  unsigned int flavor = 0,
237  bool proximityBonding = true);
239  std::istream *inStream, bool sanitize = true, bool removeHs = true,
240  unsigned int flavor = 0, bool proximityBonding = true);
242  std::istream &inStream, bool sanitize = true, bool removeHs = true,
243  unsigned int flavor = 0, bool proximityBonding = true);
244 RDKIT_FILEPARSERS_EXPORT RWMol *PDBFileToMol(const std::string &fname,
245  bool sanitize = true,
246  bool removeHs = true,
247  unsigned int flavor = 0,
248  bool proximityBonding = true);
249 
250 // \brief generates an PDB block for a molecule
251 /*!
252  * \param mol - the molecule in question
253  * \param confId - selects the conformer to be used
254  * \param flavor - controls what gets written:
255  * flavor & 1 : Write MODEL/ENDMDL lines around each record
256  * flavor & 2 : Don't write any CONECT records
257  * flavor & 4 : Write CONECT records in both directions
258  * flavor & 8 : Don't use multiple CONECTs to encode bond order
259  * flavor & 16 : Write MASTER record
260  * flavor & 32 : Write TER record
261  */
262 RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol,
263  int confId = -1,
264  unsigned int flavor = 0);
265 // \brief Writes a molecule to an MDL mol file
266 /*!
267  * \param mol - the molecule in question
268  * \param fName - the name of the file to use
269  * \param confId - selects the conformer to be used
270  * \param flavor - controls what gets written:
271  * flavor & 1 : Write MODEL/ENDMDL lines around each record
272  * flavor & 2 : Don't write any CONECT records
273  * flavor & 4 : Write CONECT records in both directions
274  * flavor & 8 : Don't use multiple CONECTs to encode bond order
275  * flavor & 16 : Write MASTER record
276  * flavor & 32 : Write TER record
277  */
279  const std::string &fname,
280  int confId = -1,
281  unsigned int flavor = 0);
282 
283 // \brief reads a molecule from the metadata in an RDKit-generated SVG file
284 /*!
285  * \param svg - string containing the SVG
286  * \param sanitize - toggles sanitization of the molecule
287  * \param removeHs - toggles removal of Hs from the molecule. H removal
288  * is only done if the molecule is sanitized
289  *
290  * **NOTE** This functionality should be considered beta.
291  */
292 RDKIT_FILEPARSERS_EXPORT RWMol *RDKitSVGToMol(const std::string &svg,
293  bool sanitize = true,
294  bool removeHs = true);
295 /*! \overload
296  */
297 RDKIT_FILEPARSERS_EXPORT RWMol *RDKitSVGToMol(std::istream *instream,
298  bool sanitize = true,
299  bool removeHs = true);
300 
301 } // namespace RDKit
302 
303 #endif
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:203
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:31
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
pulls in the core RDKit functionality
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
Std stuff.
Definition: Atom.h:30
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)