RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOLWRITERS_H_
13 #define _RD_MOLWRITERS_H_
14 
15 #include <RDGeneral/types.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <GraphMol/ROMol.h>
20 
21 namespace RDKit {
22 
23 static int defaultConfId = -1;
25  public:
26  virtual ~MolWriter() {}
27  virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
28  virtual void flush() = 0;
29  virtual void close() = 0;
30  virtual void setProps(const STR_VECT &propNames) = 0;
31  virtual unsigned int numMols() const = 0;
32 };
33 
34 //! The SmilesWriter is for writing molecules and properties to
35 //! delimited text files.
37  /******************************************************************************
38  * A Smiles Table writer - this is how it is used
39  * - create a SmilesWriter with a output file name (or a ostream), a
40  *delimiter,
41  * and a list of properties that need to be written out
42  * - then a call is made to the write function for each molecule that needs
43  *to
44  * be written out
45  ******************************************************************************/
46  public:
47  /*!
48  \param fileName : filename to write to ("-" to write to stdout)
49  \param delimiter : delimiter to use in the text file
50  \param nameHeader : used to label the name column in the output. If this
51  is provided as the empty string, no names will be
52  written.
53  \param includeHeader : toggles inclusion of a header line in the output
54  \param isomericSmiles : toggles generation of isomeric SMILES
55  \param kekuleSmiles : toggles the generation of kekule SMILES
56 
57  */
58  SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
59  const std::string &nameHeader = "Name",
60  bool includeHeader = true, bool isomericSmiles = true,
61  bool kekuleSmiles = false);
62  //! \overload
63  SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
64  std::string nameHeader = "Name", bool includeHeader = true,
65  bool takeOwnership = false, bool isomericSmiles = true,
66  bool kekuleSmiles = false);
67 
68  ~SmilesWriter();
69 
70  //! \brief set a vector of property names that are need to be
71  //! written out for each molecule
72  void setProps(const STR_VECT &propNames);
73 
74  //! \brief write a new molecule to the file
75  void write(const ROMol &mol, int confId = defaultConfId);
76 
77  //! \brief flush the ostream
78  void flush() {
79  PRECONDITION(dp_ostream, "no output stream");
80  try {
81  dp_ostream->flush();
82  } catch (...) {
83  try {
84  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
85  } catch (const std::runtime_error &) {
86  }
87  }
88  };
89 
90  //! \brief close our stream (the writer cannot be used again)
91  void close() {
92  flush();
93  std::ostream *tmp_ostream = dp_ostream;
94  dp_ostream = NULL;
95  if (df_owner) {
96  df_owner = false;
97  delete tmp_ostream;
98  }
99  };
100 
101  //! \brief get the number of molecules written so far
102  unsigned int numMols() const { return d_molid; };
103 
104  private:
105  // local initialization
106  void init(const std::string &delimiter, const std::string &nameHeader,
107  bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
108 
109  // dumps a header line to the output stream
110  void dumpHeader() const;
111 
112  std::ostream *dp_ostream;
113  bool df_owner;
114  bool df_includeHeader; // whether or not to include a title line
115  unsigned int d_molid; // the number of the molecules we wrote so far
116  std::string d_delim; // delimiter string between various records
117  std::string d_nameHeader; // header for the name column in the output file
118  STR_VECT d_props; // list of property name that need to be written out
119  bool df_isomericSmiles; // whether or not to do isomeric smiles
120  bool df_kekuleSmiles; // whether or not to do kekule smiles
121 };
122 
123 //! The SDWriter is for writing molecules and properties to
124 //! SD files
126  /**************************************************************************************
127  * A SD file ( or stream) writer - this is how it is used
128  * - create a SDMolWriter with a output file name (or a ostream),
129  * and a list of properties that need to be written out
130  * - then a call is made to the write function for each molecule that needs
131  *to be written out
132  **********************************************************************************************/
133  public:
134  /*!
135  \param fileName : filename to write to ("-" to write to stdout)
136  */
137  SDWriter(const std::string &fileName);
138  SDWriter(std::ostream *outStream, bool takeOwnership = false);
139 
140  ~SDWriter();
141 
142  //! \brief set a vector of property names that are need to be
143  //! written out for each molecule
144  void setProps(const STR_VECT &propNames);
145 
146  //! \brief return the text that would be written to the file
147  static std::string getText(const ROMol &mol, int confId = defaultConfId,
148  bool kekulize = true, bool force_V3000 = false,
149  int molid = -1, STR_VECT *propNames = NULL);
150 
151  //! \brief write a new molecule to the file
152  void write(const ROMol &mol, int confId = defaultConfId);
153 
154  //! \brief flush the ostream
155  void flush() {
156  PRECONDITION(dp_ostream, "no output stream");
157  try {
158  dp_ostream->flush();
159  } catch (...) {
160  try {
161  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
162  } catch (const std::runtime_error &) {
163  }
164  }
165  };
166 
167  //! \brief close our stream (the writer cannot be used again)
168  void close() {
169  flush();
170  std::ostream *tmp_ostream = dp_ostream;
171  dp_ostream = NULL;
172  if (df_owner) {
173  df_owner = false;
174  delete tmp_ostream;
175  }
176  };
177 
178  //! \brief get the number of molecules written so far
179  unsigned int numMols() const { return d_molid; };
180 
181  void setForceV3000(bool val) { df_forceV3000 = val; };
182  bool getForceV3000() const { return df_forceV3000; };
183 
184  void setKekulize(bool val) { df_kekulize = val; };
185  bool getKekulize() const { return df_kekulize; };
186 
187  private:
188  void writeProperty(const ROMol &mol, const std::string &name);
189 
190  std::ostream *dp_ostream;
191  bool df_owner;
192  unsigned int d_molid; // the number of the molecules we wrote so far
193  STR_VECT d_props; // list of property name that need to be written out
194  bool df_forceV3000; // force writing the mol blocks as V3000
195  bool df_kekulize; // toggle kekulization of molecules on writing
196 };
197 
198 //! The TDTWriter is for writing molecules and properties to
199 //! TDT files
201  /**************************************************************************************
202  * A TDT file ( or stream) writer - this is how it is used
203  * - create a TDTWriter with a output file name (or a ostream),
204  * and a list of properties that need to be written out
205  * - then a call is made to the write function for each molecule that needs
206  *to be written out
207  **********************************************************************************************/
208  public:
209  /*!
210  \param fileName : filename to write to ("-" to write to stdout)
211  */
212  TDTWriter(const std::string &fileName);
213  TDTWriter(std::ostream *outStream, bool takeOwnership = false);
214 
215  ~TDTWriter();
216 
217  //! \brief set a vector of property names that are need to be
218  //! written out for each molecule
219  void setProps(const STR_VECT &propNames);
220 
221  //! \brief write a new molecule to the file
222  void write(const ROMol &mol, int confId = defaultConfId);
223 
224  //! \brief flush the ostream
225  void flush() {
226  PRECONDITION(dp_ostream, "no output stream");
227  try {
228  dp_ostream->flush();
229  } catch (...) {
230  try {
231  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
232  } catch (const std::runtime_error &) {
233  }
234  }
235  };
236 
237  //! \brief close our stream (the writer cannot be used again)
238  void close() {
239  flush();
240  std::ostream *tmp_ostream = dp_ostream;
241  dp_ostream = NULL;
242  if (df_owner) {
243  df_owner = false;
244  delete tmp_ostream;
245  }
246  };
247 
248  //! \brief get the number of molecules written so far
249  unsigned int numMols() const { return d_molid; };
250 
251  void setWrite2D(bool state = true) { df_write2D = state; };
252  bool getWrite2D() const { return df_write2D; };
253 
254  void setWriteNames(bool state = true) { df_writeNames = state; };
255  bool getWriteNames() const { return df_writeNames; };
256 
257  void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; };
258  unsigned int getNumDigits() const { return d_numDigits; };
259 
260  private:
261  void writeProperty(const ROMol &mol, const std::string &name);
262 
263  std::ostream *dp_ostream;
264  bool df_owner;
265  unsigned int d_molid; // the number of molecules we wrote so far
266  STR_VECT d_props; // list of property name that need to be written out
267  bool df_write2D; // write 2D coordinates instead of 3D
268  bool df_writeNames; // write a name record for each molecule
269  unsigned int
270  d_numDigits; // number of digits to use in our output of coordinates;
271 };
272 
273 //! The PDBWriter is for writing molecules to Brookhaven Protein
274 //! DataBank format files.
276  public:
277  PDBWriter(const std::string &fileName, unsigned int flavor = 0);
278  PDBWriter(std::ostream *outStream, bool takeOwnership = false,
279  unsigned int flavor = 0);
280  ~PDBWriter();
281 
282  //! \brief write a new molecule to the file
283  void write(const ROMol &mol, int confId = defaultConfId);
284 
285  void setProps(const STR_VECT &){};
286 
287  //! \brief flush the ostream
288  void flush() {
289  PRECONDITION(dp_ostream, "no output stream");
290  try {
291  dp_ostream->flush();
292  } catch (...) {
293  try {
294  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
295  } catch (const std::runtime_error &) {
296  }
297  }
298  };
299 
300  //! \brief close our stream (the writer cannot be used again)
301  void close() {
302  flush();
303  std::ostream *tmp_ostream = dp_ostream;
304  dp_ostream = NULL;
305  if (df_owner) {
306  df_owner = false;
307  delete tmp_ostream;
308  }
309  };
310 
311  //! \brief get the number of molecules written so far
312  unsigned int numMols() const { return d_count; };
313 
314  private:
315  std::ostream *dp_ostream;
316  unsigned int d_flavor;
317  unsigned int d_count;
318  bool df_owner;
319 };
320 }
321 
322 #endif
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:91
void setProps(const STR_VECT &)
Definition: MolWriters.h:285
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:203
virtual ~MolWriter()
Definition: MolWriters.h:26
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:312
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:238
void flush()
flush the ostream
Definition: MolWriters.h:288
void flush()
flush the ostream
Definition: MolWriters.h:78
Defines the primary molecule class ROMol as well as associated typedefs.
bool getWrite2D() const
Definition: MolWriters.h:252
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:168
bool getWriteNames() const
Definition: MolWriters.h:255
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:301
void flush()
flush the ostream
Definition: MolWriters.h:155
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:179
bool getKekulize() const
Definition: MolWriters.h:185
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:102
static int defaultConfId
Definition: MolWriters.h:23
unsigned int getNumDigits() const
Definition: MolWriters.h:258
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:257
void setWriteNames(bool state=true)
Definition: MolWriters.h:254
bool getForceV3000() const
Definition: MolWriters.h:182
void setForceV3000(bool val)
Definition: MolWriters.h:181
Std stuff.
Definition: Atom.h:30
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:249
#define PRECONDITION(expr, mess)
Definition: Invariant.h:108
void setWrite2D(bool state=true)
Definition: MolWriters.h:251
void flush()
flush the ostream
Definition: MolWriters.h:225
std::vector< std::string > STR_VECT
Definition: Dict.h:29
void setKekulize(bool val)
Definition: MolWriters.h:184