RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_FILEPARSERS_H
11 #define _RD_FILEPARSERS_H
12 
13 #include <RDGeneral/types.h>
14 #include <GraphMol/RDKitBase.h>
15 
16 #include <string>
17 #include <iostream>
18 #include <vector>
19 #include <exception>
20 
21 #include <boost/shared_ptr.hpp>
22 
23 namespace RDKit{
24  const int MOLFILE_MAXLINE=256;
25  std::string strip(const std::string &orig);
26 
27  //-----
28  // mol files
29  //-----
30  typedef std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT;
31  // \brief construct a molecule from MDL mol data in a stream
32  /*!
33  * \param inStream - stream containing the data
34  * \param line - current line number (used for error reporting)
35  * \param sanitize - toggles sanitization and stereochemistry
36  * perception of the molecule
37  * \param removeHs - toggles removal of Hs from the molecule. H removal
38  * is only done if the molecule is sanitized
39  * \param line - current line number (used for error reporting)
40  * \param strictParsing - if not set, the parser is more lax about correctness
41  * of the contents.
42  *
43  */
44  RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line,
45  bool sanitize=true,bool removeHs=true,
46  bool strictParsing=true);
47  // \overload
48  RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line,
49  bool sanitize=true,bool removeHs=true,
50  bool strictParsing=true);
51  // \brief construct a molecule from an MDL mol block
52  /*!
53  * \param molBlock - string containing the mol block
54  * \param sanitize - toggles sanitization and stereochemistry
55  * perception of the molecule
56  * \param removeHs - toggles removal of Hs from the molecule. H removal
57  * is only done if the molecule is sanitized
58  * \param strictParsing - if set, the parser is more lax about correctness
59  * of the contents.
60  */
61  RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize=true,
62  bool removeHs=true,bool strictParsing=true);
63 
64  // \brief construct a molecule from an MDL mol file
65  /*!
66  * \param fName - string containing the file name
67  * \param sanitize - toggles sanitization and stereochemistry
68  * perception of the molecule
69  * \param removeHs - toggles removal of Hs from the molecule. H removal
70  * is only done if the molecule is sanitized
71  * \param strictParsing - if set, the parser is more lax about correctness
72  * of the contents.
73  */
74  RWMol *MolFileToMol(std::string fName, bool sanitize=true,
75  bool removeHs=true,bool strictParsing=true);
76 
77  // \brief generates an MDL mol block for a molecule
78  /*!
79  * \param mol - the molecule in question
80  * \param includeStereo - toggles inclusion of stereochemistry information
81  * \param confId - selects the conformer to be used
82  * \param kekulize - triggers kekulization of the molecule before it is written
83  * \param forceV3000 - force generation a V3000 mol block (happens automatically with
84  * more than 999 atoms or bonds)
85  */
86  std::string MolToMolBlock(const ROMol &mol,bool includeStereo=true,
87  int confId=-1,bool kekulize=true,bool forceV3000=false);
88  // \brief Writes a molecule to an MDL mol file
89  /*!
90  * \param mol - the molecule in question
91  * \param fName - the name of the file to use
92  * \param includeStereo - toggles inclusion of stereochemistry information
93  * \param confId - selects the conformer to be used
94  * \param kekulize - triggers kekulization of the molecule before it is written
95  * \param forceV3000 - force generation a V3000 mol block (happens automatically with
96  * more than 999 atoms or bonds)
97  */
98  void MolToMolFile(const ROMol &mol,std::string fName,bool includeStereo=true,
99  int confId=-1,bool kekulize=true,bool forceV3000=false);
100 
101 
102  //-----
103  // TPL handling:
104  //-----
105 
106  //! \brief translate TPL data (BioCad format) into a multi-conf molecule
107  /*!
108  \param inStream: the stream from which to read
109  \param line: used to track the line number of errors
110  \param sanitize: toggles sanitization and stereochemistry
111  perception of the molecule
112  \param skipFirstConf: according to the TPL format description, the atomic
113  coords in the atom-information block describe the first
114  conformation and the first conf block describes second
115  conformation. The CombiCode, on the other hand, writes
116  the first conformation data both to the atom-information
117  block and to the first conf block. We want to be able to
118  read CombiCode-style tpls, so we'll allow this mis-feature
119  to be parsed when this flag is set.
120  */
121  RWMol *TPLDataStreamToMol(std::istream *inStream, unsigned int &line,
122  bool sanitize=true,
123  bool skipFirstConf=false);
124 
125  //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
126  /*!
127  \param fName: the name of the file from which to read
128  \param sanitize: toggles sanitization and stereochemistry
129  perception of the molecule
130  \param skipFirstConf: according to the TPL format description, the atomic
131  coords in the atom-information block describe the first
132  conformation and the first conf block describes second
133  conformation. The CombiCode, on the other hand, writes
134  the first conformation data both to the atom-information
135  block and to the first conf block. We want to be able to
136  read CombiCode-style tpls, so we'll allow this mis-feature
137  to be parsed when this flag is set.
138  */
139  RWMol *TPLFileToMol(std::string fName,bool sanitize=true,
140  bool skipFirstConf=false);
141 
142  std::string MolToTPLText(const ROMol &mol,
143  std::string partialChargeProp="_GasteigerCharge",
144  bool writeFirstConfTwice=false);
145  void MolToTPLFile(const ROMol &mol,std::string fName,
146  std::string partialChargeProp="_GasteigerCharge",
147  bool writeFirstConfTwice=false);
148 
149  //-----
150  // MOL2 handling
151  //-----
152 
153  typedef enum {
154  CORINA=0 //! supports output from Corina and some dbtranslate output
155  } Mol2Type;
156 
157  // \brief construct a molecule from a Tripos mol2 file
158  /*!
159  *
160  * \param fName - string containing the file name
161  * \param sanitize - toggles sanitization of the molecule
162  * \param removeHs - toggles removal of Hs from the molecule. H removal
163  * is only done if the molecule is sanitized
164  * \param variant - the atom type definitions to use
165  */
166  RWMol *Mol2FileToMol(std::string fName,bool sanitize=true,bool removeHs=true,
167  Mol2Type variant=CORINA);
168 
169  // \brief construct a molecule from Tripos mol2 data in a stream
170  /*!
171  * \param inStream - stream containing the data
172  * \param sanitize - toggles sanitization of the molecule
173  * \param removeHs - toggles removal of Hs from the molecule. H removal
174  * is only done if the molecule is sanitized
175  * \param variant - the atom type definitions to use
176  */
177  RWMol *Mol2DataStreamToMol(std::istream *inStream,bool sanitize=true,bool removeHs=true,
178  Mol2Type variant=CORINA);
179  // \overload
180  RWMol *Mol2DataStreamToMol(std::istream &inStream,bool sanitize=true,bool removeHs=true,
181  Mol2Type variant=CORINA);
182 
183  // \brief construct a molecule from a Tripos mol2 block
184  /*!
185  * \param molBlock - string containing the mol block
186  * \param sanitize - toggles sanitization of the molecule
187  * \param removeHs - toggles removal of Hs from the molecule. H removal
188  * is only done if the molecule is sanitized
189  * \param variant - the atom type definitions to use
190  */
191  RWMol *Mol2BlockToMol(const std::string &molBlock,bool sanitize=true,bool removeHs=true,
192  Mol2Type variant=CORINA);
193 
194  RWMol *PDBBlockToMol(const char *str, bool sanitize=true,
195  bool removeHs=true, unsigned int flavor=0);
196 
197  RWMol *PDBBlockToMol(const std::string &str, bool sanitize=true,
198  bool removeHs=true, unsigned int flavor=0);
199  RWMol *PDBDataStreamToMol(std::istream *inStream, bool sanitize=true,
200  bool removeHs=true, unsigned int flavor=0);
201  RWMol *PDBDataStreamToMol(std::istream &inStream, bool sanitize=true,
202  bool removeHs=true, unsigned int flavor=0);
203  RWMol *PDBFileToMol(const std::string &fname, bool sanitize=true,
204  bool removeHs=true, unsigned int flavor=0);
205 
206  // \brief generates an PDB block for a molecule
207  /*!
208  * \param mol - the molecule in question
209  * \param confId - selects the conformer to be used
210  * \param flavor - controls what gets written:
211  * flavor & 1 : Write MODEL/ENDMDL lines around each record
212  * flavor & 2 : Don't write any CONECT records
213  * flavor & 4 : Write CONECT records in both directions
214  * flavor & 8 : Don't use multiple CONECTs to encode bond order
215  * flavor & 16 : Write MASTER record
216  * flavor & 32 : Write TER record
217  */
218  std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0);
219  // \brief Writes a molecule to an MDL mol file
220  /*!
221  * \param mol - the molecule in question
222  * \param fName - the name of the file to use
223  * \param confId - selects the conformer to be used
224  * \param flavor - controls what gets written:
225  * flavor & 1 : Write MODEL/ENDMDL lines around each record
226  * flavor & 2 : Don't write any CONECT records
227  * flavor & 4 : Write CONECT records in both directions
228  * flavor & 8 : Don't use multiple CONECTs to encode bond order
229  * flavor & 16 : Write MASTER record
230  * flavor & 32 : Write TER record
231  */
232  void MolToPDBFile(const ROMol &mol,const std::string &fname, int confId=-1, unsigned int flavor=0);
233 }
234 
235 #endif
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:30
RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0)
RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0)
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA)
void MolToMolFile(const ROMol &mol, std::string fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
const int MOLFILE_MAXLINE
Definition: FileParsers.h:24
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * TPLFileToMol(std::string fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
pulls in the core RDKit functionality
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
RWMol * Mol2FileToMol(std::string fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA)
void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
std::string strip(const std::string &orig)
RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA)
std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0)
RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * MolFileToMol(std::string fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
std::string MolToTPLText(const ROMol &mol, std::string partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
void MolToTPLFile(const ROMol &mol, std::string fName, std::string partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)