RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
FileParsers.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_FILEPARSERS_H
12#define RD_FILEPARSERS_H
13
14#include <RDGeneral/types.h>
15#include <GraphMol/RDKitBase.h>
16#include "CDXMLParser.h"
17#include <string>
18#include <string_view>
19#include <iostream>
20#include <vector>
21#include <exception>
22
23#include <boost/shared_ptr.hpp>
24
25namespace RDKit {
26const int MOLFILE_MAXLINE = 256;
27RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
28
30 : public std::exception {
31 public:
32 //! construct with an error message
33 explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg) {}
34 //! construct with an error message
35 explicit MolFileUnhandledFeatureException(const std::string msg)
36 : _msg(msg) {}
37 //! get the error message
38 const char *what() const noexcept override { return _msg.c_str(); }
39 ~MolFileUnhandledFeatureException() noexcept override = default;
40
41 private:
42 std::string _msg;
43};
44
45//-----
46// mol files
47//-----
48typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
49// \brief construct a molecule from MDL mol data in a stream
50/*!
51 * \param inStream - stream containing the data
52 * \param line - current line number (used for error reporting)
53 * \param sanitize - toggles sanitization and stereochemistry
54 * perception of the molecule
55 * \param removeHs - toggles removal of Hs from the molecule. H removal
56 * is only done if the molecule is sanitized
57 * \param line - current line number (used for error reporting)
58 * \param strictParsing - if set to false, the parser is more lax about
59 * correctness of the contents.
60 *
61 */
62RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream *inStream,
63 unsigned int &line,
64 bool sanitize = true,
65 bool removeHs = true,
66 bool strictParsing = true);
67// \overload
68RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream &inStream,
69 unsigned int &line,
70 bool sanitize = true,
71 bool removeHs = true,
72 bool strictParsing = true);
73// \brief construct a molecule from an MDL mol block
74/*!
75 * \param molBlock - string containing the mol block
76 * \param sanitize - toggles sanitization and stereochemistry
77 * perception of the molecule
78 * \param removeHs - toggles removal of Hs from the molecule. H removal
79 * is only done if the molecule is sanitized
80 * \param strictParsing - if set to false, the parser is more lax about
81 * correctness of the contents.
82 */
83RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
84 bool sanitize = true,
85 bool removeHs = true,
86 bool strictParsing = true);
87
88// \brief construct a molecule from an MDL mol file
89/*!
90 * \param fName - string containing the file name
91 * \param sanitize - toggles sanitization and stereochemistry
92 * perception of the molecule
93 * \param removeHs - toggles removal of Hs from the molecule. H removal
94 * is only done if the molecule is sanitized
95 * \param strictParsing - if set to false, the parser is more lax about
96 * correctness of the contents.
97 */
98RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
99 bool sanitize = true,
100 bool removeHs = true,
101 bool strictParsing = true);
102
103// \brief generates an MDL mol block for a molecule
104/*!
105 * \param mol - the molecule in question
106 * \param includeStereo - toggles inclusion of stereochemistry information
107 * \param confId - selects the conformer to be used
108 * \param kekulize - triggers kekulization of the molecule before it is
109 * written
110 * \param forceV3000 - force generation a V3000 mol block (happens
111 * automatically with
112 * more than 999 atoms or bonds)
113 */
114RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol,
115 bool includeStereo = true,
116 int confId = -1,
117 bool kekulize = true,
118 bool forceV3000 = false);
119
120// \brief generates an MDL v3000 mol block for a molecule
121/*!
122 * \param mol - the molecule in question
123 * \param includeStereo - toggles inclusion of stereochemistry information
124 * \param confId - selects the conformer to be used
125 * \param kekulize - triggers kekulization of the molecule before it is
126 * written
127 */
128inline std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo = true,
129 int confId = -1, bool kekulize = true) {
130 return MolToMolBlock(mol, includeStereo, confId, kekulize, true);
131}
132
133// \brief Writes a molecule to an MDL mol file
134/*!
135 * \param mol - the molecule in question
136 * \param fName - the name of the file to use
137 * \param includeStereo - toggles inclusion of stereochemistry information
138 * \param confId - selects the conformer to be used
139 * \param kekulize - triggers kekulization of the molecule before it is
140 * written
141 * \param forceV3000 - force generation a V3000 mol block (happens
142 * automatically with
143 * more than 999 atoms or bonds)
144 */
146 const ROMol &mol, const std::string &fName, bool includeStereo = true,
147 int confId = -1, bool kekulize = true, bool forceV3000 = false);
148
149// \brief Writes a molecule to an MDL V3000 mol file
150/*!
151 * \param mol - the molecule in question
152 * \param fName - the name of the file to use
153 * \param includeStereo - toggles inclusion of stereochemistry information
154 * \param confId - selects the conformer to be used
155 * \param kekulize - triggers kekulization of the molecule before it is
156 * written
157 */
158inline void MolToV3KMolFile(const ROMol &mol, const std::string &fName,
159 bool includeStereo = true, int confId = -1,
160 bool kekulize = true) {
161 MolToMolFile(mol, fName, includeStereo, confId, kekulize, true);
162}
163
165 int confId = -1,
166 bool kekulize = true);
167
169 const std::string &fName,
170 int confId = -1,
171 bool kekulize = true);
172
174 int confId = -1);
175
177 const std::string &fName,
178 int confId = -1);
179
180//-----
181// TPL handling:
182//-----
183
184//! \brief translate TPL data (BioCad format) into a multi-conf molecule
185/*!
186 \param inStream: the stream from which to read
187 \param line: used to track the line number of errors
188 \param sanitize: toggles sanitization and stereochemistry
189 perception of the molecule
190 \param skipFirstConf: according to the TPL format description, the atomic
191 coords in the atom-information block describe the first
192 conformation and the first conf block describes second
193 conformation. The CombiCode, on the other hand, writes
194 the first conformation data both to the atom-information
195 block and to the first conf block. We want to be able to
196 read CombiCode-style tpls, so we'll allow this
197 mis-feature
198 to be parsed when this flag is set.
199*/
201 unsigned int &line,
202 bool sanitize = true,
203 bool skipFirstConf = false);
204
205//! \brief construct a multi-conf molecule from a TPL (BioCad format) file
206/*!
207 \param fName: the name of the file from which to read
208 \param sanitize: toggles sanitization and stereochemistry
209 perception of the molecule
210 \param skipFirstConf: according to the TPL format description, the atomic
211 coords in the atom-information block describe the first
212 conformation and the first conf block describes second
213 conformation. The CombiCode, on the other hand, writes
214 the first conformation data both to the atom-information
215 block and to the first conf block. We want to be able to
216 read CombiCode-style tpls, so we'll allow this
217 mis-feature
218 to be parsed when this flag is set.
219*/
221 bool sanitize = true,
222 bool skipFirstConf = false);
223
225 const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
226 bool writeFirstConfTwice = false);
228 const ROMol &mol, const std::string &fName,
229 const std::string &partialChargeProp = "_GasteigerCharge",
230 bool writeFirstConfTwice = false);
231
232//-----
233// MOL2 handling
234//-----
235
236typedef enum {
237 CORINA = 0 //!< supports output from Corina and some dbtranslate output
238} Mol2Type;
239
240// \brief construct a molecule from a Tripos mol2 file
241/*!
242 *
243 * \param fName - string containing the file name
244 * \param sanitize - toggles sanitization of the molecule
245 * \param removeHs - toggles removal of Hs from the molecule. H removal
246 * is only done if the molecule is sanitized
247 * \param variant - the atom type definitions to use
248 * \param cleanupSubstructures - toggles recognition and cleanup of common
249 * substructures
250 */
252 bool sanitize = true,
253 bool removeHs = true,
254 Mol2Type variant = CORINA,
255 bool cleanupSubstructures = true);
256
257// \brief construct a molecule from Tripos mol2 data in a stream
258/*!
259 * \param inStream - stream containing the data
260 * \param sanitize - toggles sanitization of the molecule
261 * \param removeHs - toggles removal of Hs from the molecule. H removal
262 * is only done if the molecule is sanitized
263 * \param variant - the atom type definitions to use
264 * \param cleanupSubstructures - toggles recognition and cleanup of common
265 * substructures
266 */
268 std::istream *inStream, bool sanitize = true, bool removeHs = true,
269 Mol2Type variant = CORINA, bool cleanupSubstructures = true);
270// \overload
272 std::istream &inStream, bool sanitize = true, bool removeHs = true,
273 Mol2Type variant = CORINA, bool cleanupSubstructures = true);
274
275// \brief construct a molecule from a Tripos mol2 block
276/*!
277 * \param molBlock - string containing the mol block
278 * \param sanitize - toggles sanitization of the molecule
279 * \param removeHs - toggles removal of Hs from the molecule. H removal
280 * is only done if the molecule is sanitized
281 * \param variant - the atom type definitions to use
282 * \param cleanupSubstructures - toggles recognition and cleanup of common
283 * substructures
284 */
286 const std::string &molBlock, bool sanitize = true, bool removeHs = true,
287 Mol2Type variant = CORINA, bool cleanupSubstructures = true);
288
290// \brief construct a molecule from an xyz block
291/*!
292 * \param xyzBlock - string containing the xyz block
293 */
294RDKIT_FILEPARSERS_EXPORT RWMol *XYZBlockToMol(const std::string &xyzBlock);
295// \brief construct a molecule from an xyz file
296/*!
297 * \param fName - string containing the file name
298 */
299RDKIT_FILEPARSERS_EXPORT RWMol *XYZFileToMol(const std::string &fName);
300
302 bool sanitize = true,
303 bool removeHs = true,
304 unsigned int flavor = 0,
305 bool proximityBonding = true);
306
308 bool sanitize = true,
309 bool removeHs = true,
310 unsigned int flavor = 0,
311 bool proximityBonding = true);
313 std::istream *inStream, bool sanitize = true, bool removeHs = true,
314 unsigned int flavor = 0, bool proximityBonding = true);
316 std::istream &inStream, bool sanitize = true, bool removeHs = true,
317 unsigned int flavor = 0, bool proximityBonding = true);
319 bool sanitize = true,
320 bool removeHs = true,
321 unsigned int flavor = 0,
322 bool proximityBonding = true);
323
324// \brief generates an PDB block for a molecule
325/*!
326 * \param mol - the molecule in question
327 * \param confId - selects the conformer to be used
328 * \param flavor - controls what gets written:
329 * flavor & 1 : Write MODEL/ENDMDL lines around each record
330 * flavor & 2 : Don't write single CONECT records
331 * flavor & 4 : Write CONECT records in both directions
332 * flavor & 8 : Don't use multiple CONECTs to encode bond order
333 * flavor & 16 : Write MASTER record
334 * flavor & 32 : Write TER record
335 */
337 int confId = -1,
338 unsigned int flavor = 0);
339// \brief Writes a molecule to an MDL mol file
340/*!
341 * \param mol - the molecule in question
342 * \param fName - the name of the file to use
343 * \param confId - selects the conformer to be used
344 * \param flavor - controls what gets written:
345 * flavor & 1 : Write MODEL/ENDMDL lines around each record
346 * flavor & 2 : Don't write single CONECT records
347 * flavor & 4 : Write CONECT records in both directions
348 * flavor & 8 : Don't use multiple CONECTs to encode bond order
349 * flavor & 16 : Write MASTER record
350 * flavor & 32 : Write TER record
351 */
353 const std::string &fname,
354 int confId = -1,
355 unsigned int flavor = 0);
356
357// \brief reads a molecule from the metadata in an RDKit-generated SVG file
358/*!
359 * \param svg - string containing the SVG
360 * \param sanitize - toggles sanitization of the molecule
361 * \param removeHs - toggles removal of Hs from the molecule. H removal
362 * is only done if the molecule is sanitized
363 *
364 * **NOTE** This functionality should be considered beta.
365 */
367 bool sanitize = true,
368 bool removeHs = true);
369/*! \overload
370 */
372 bool sanitize = true,
373 bool removeHs = true);
374
375inline std::unique_ptr<RDKit::RWMol> operator"" _ctab(const char *text,
376 size_t len) {
377 std::string data(text, len);
378 RWMol *ptr = nullptr;
379 try {
380 ptr = MolBlockToMol(data);
381 } catch (const RDKit::MolSanitizeException &) {
382 ptr = nullptr;
383 }
384 return std::unique_ptr<RWMol>(ptr);
385}
386inline std::unique_ptr<RDKit::RWMol> operator"" _mol2(const char *text,
387 size_t len) {
388 std::string data(text, len);
389 RWMol *ptr = nullptr;
390 try {
391 ptr = Mol2BlockToMol(data);
392 } catch (const RDKit::MolSanitizeException &) {
393 ptr = nullptr;
394 }
395 return std::unique_ptr<RWMol>(ptr);
396}
397
398inline std::unique_ptr<RDKit::RWMol> operator"" _pdb(const char *text,
399 size_t len) {
400 std::string data(text, len);
401 RWMol *ptr = nullptr;
402 try {
403 ptr = PDBBlockToMol(data);
404 } catch (const RDKit::MolSanitizeException &) {
405 ptr = nullptr;
406 }
407 return std::unique_ptr<RWMol>(ptr);
408}
409
410} // namespace RDKit
411
412#endif
pulls in the core RDKit functionality
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition FileParsers.h:33
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition FileParsers.h:35
~MolFileUnhandledFeatureException() noexcept override=default
const char * what() const noexcept override
get the error message
Definition FileParsers.h:38
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:161
Std stuff.
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId=-1)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT void MolToXYZFile(const ROMol &mol, const std::string &fName, int confId=-1)
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT void MolToCMLFile(const ROMol &mol, const std::string &fName, int confId=-1, bool kekulize=true)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
void MolToV3KMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true)
RDKIT_FILEPARSERS_EXPORT RWMol * XYZBlockToMol(const std::string &xyzBlock)
RDKIT_FILEPARSERS_EXPORT RWMol * XYZDataStreamToMol(std::istream &inStream)
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * XYZFileToMol(const std::string &fName)
RDKIT_FILEPARSERS_EXPORT std::string MolToCMLBlock(const ROMol &mol, int confId=-1, bool kekulize=true)
@ CORINA
supports output from Corina and some dbtranslate output
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
const int MOLFILE_MAXLINE
Definition FileParsers.h:26
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition FileParsers.h:48
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition RWMol.h:216