RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #ifndef _RD_MOLWRITERS_H_
12 #define _RD_MOLWRITERS_H_
13 
14 #include <RDGeneral/types.h>
15 
16 #include <string>
17 #include <iostream>
18 #include <GraphMol/ROMol.h>
19 
20 namespace RDKit {
21 
22  static int defaultConfId=-1;
23  class MolWriter {
24  public:
25  virtual ~MolWriter() {}
26  virtual void write(const ROMol &mol,int confId=defaultConfId) = 0;
27  virtual void flush() = 0;
28  virtual void close() = 0;
29  virtual void setProps(const STR_VECT &propNames)=0;
30  virtual unsigned int numMols() const =0;
31  };
32 
33  //! The SmilesWriter is for writing molecules and properties to
34  //! delimited text files.
35  class SmilesWriter : public MolWriter {
36  /******************************************************************************
37  * A Smiles Table writer - this is how it is used
38  * - create a SmilesWriter with a output file name (or a ostream), a delimiter,
39  * and a list of properties that need to be written out
40  * - then a call is made to the write function for each molecule that needs to
41  * be written out
42  ******************************************************************************/
43  public:
44  /*!
45  \param fileName : filename to write to ("-" to write to stdout)
46  \param delimiter : delimiter to use in the text file
47  \param nameHeader : used to label the name column in the output. If this
48  is provided as the empty string, no names will be written.
49  \param includeHeader : toggles inclusion of a header line in the output
50  \param isomericSmiles : toggles generation of isomeric SMILES
51  \param kekuleSmiles : toggles the generation of kekule SMILES
52 
53  */
54  SmilesWriter(std::string fileName,
55  std::string delimiter=" ",
56  std::string nameHeader="Name",
57  bool includeHeader=true,
58  bool isomericSmiles=false,
59  bool kekuleSmiles=false);
60  //! \overload
61  SmilesWriter(std::ostream *outStream,
62  std::string delimiter=" ",
63  std::string nameHeader="Name",
64  bool includeHeader=true,
65  bool takeOwnership=false,
66  bool isomericSmiles=false,
67  bool kekuleSmiles=false);
68 
69  ~SmilesWriter();
70 
71  //! \brief set a vector of property names that are need to be
72  //! written out for each molecule
73  void setProps(const STR_VECT &propNames);
74 
75  //! \brief write a new molecule to the file
76  void write(const ROMol &mol,int confId=defaultConfId);
77 
78  //! \brief flush the ostream
79  void flush() {
80  PRECONDITION(dp_ostream,"no output stream");
81  dp_ostream->flush();
82  };
83 
84  //! \brief close our stream (the writer cannot be used again)
85  void close() {
86  PRECONDITION(dp_ostream,"no output stream");
87  dp_ostream->flush();
88  if(df_owner) {
89  delete dp_ostream;
90  df_owner=false;
91  }
92  dp_ostream=NULL;
93  };
94 
95  //! \brief get the number of molecules written so far
96  unsigned int numMols() const { return d_molid;} ;
97 
98  private:
99  // local initialization
100  void init(std::string delimiter,std::string nameHeader,
101  bool includeHeader,
102  bool isomericSmiles,
103  bool kekuleSmiles);
104 
105 
106  // dumps a header line to the output stream
107  void dumpHeader() const;
108 
109 
110  std::ostream *dp_ostream;
111  bool df_owner;
112  bool df_includeHeader; // whether or not to include a title line
113  unsigned int d_molid; // the number of the molecules we wrote so far
114  std::string d_delim; // delimiter string between various records
115  std::string d_nameHeader; // header for the name column in the output file
116  STR_VECT d_props; // list of property name that need to be written out
117  bool df_isomericSmiles; // whether or not to do isomeric smiles
118  bool df_kekuleSmiles; // whether or not to do kekule smiles
119  };
120 
121 
122  //! The SDWriter is for writing molecules and properties to
123  //! SD files
124  class SDWriter : public MolWriter {
125  /**************************************************************************************
126  * A SD file ( or stream) writer - this is how it is used
127  * - create a SDMolWriter with a output file name (or a ostream),
128  * and a list of properties that need to be written out
129  * - then a call is made to the write function for each molecule that needs to be written out
130  **********************************************************************************************/
131  public:
132  /*!
133  \param fileName : filename to write to ("-" to write to stdout)
134  */
135  SDWriter(std::string fileName);
136  SDWriter(std::ostream *outStream,bool takeOwnership=false);
137 
138  ~SDWriter();
139 
140  //! \brief set a vector of property names that are need to be
141  //! written out for each molecule
142  void setProps(const STR_VECT &propNames);
143 
144  //! \brief write a new molecule to the file
145  void write(const ROMol &mol, int confId=defaultConfId);
146 
147  //! \brief flush the ostream
148  void flush() {
149  PRECONDITION(dp_ostream,"no output stream");
150  dp_ostream->flush();
151  } ;
152 
153  //! \brief close our stream (the writer cannot be used again)
154  void close() {
155  PRECONDITION(dp_ostream,"no output stream");
156  dp_ostream->flush();
157  if(df_owner) {
158  delete dp_ostream;
159  df_owner=false;
160  }
161  dp_ostream=NULL;
162  };
163 
164  //! \brief get the number of molecules written so far
165  unsigned int numMols() const { return d_molid; };
166 
167  void setForceV3000(bool val) { df_forceV3000=val; };
168  bool getForceV3000() const { return df_forceV3000; };
169 
170  void setKekulize(bool val) { df_kekulize=val; };
171  bool getKekulize() const { return df_kekulize; };
172 
173  private:
174  void writeProperty(const ROMol &mol, std::string name);
175 
176  std::ostream *dp_ostream;
177  bool df_owner;
178  unsigned int d_molid; // the number of the molecules we wrote so far
179  STR_VECT d_props; // list of property name that need to be written out
180  bool df_forceV3000; // force writing the mol blocks as V3000
181  bool df_kekulize; // toggle kekulization of molecules on writing
182  };
183 
184  //! The TDTWriter is for writing molecules and properties to
185  //! TDT files
186  class TDTWriter : public MolWriter {
187  /**************************************************************************************
188  * A TDT file ( or stream) writer - this is how it is used
189  * - create a TDTWriter with a output file name (or a ostream),
190  * and a list of properties that need to be written out
191  * - then a call is made to the write function for each molecule that needs to be written out
192  **********************************************************************************************/
193  public:
194  /*!
195  \param fileName : filename to write to ("-" to write to stdout)
196  */
197  TDTWriter(std::string fileName);
198  TDTWriter(std::ostream *outStream,bool takeOwnership=false);
199 
200  ~TDTWriter();
201 
202  //! \brief set a vector of property names that are need to be
203  //! written out for each molecule
204  void setProps(const STR_VECT &propNames);
205 
206  //! \brief write a new molecule to the file
207  void write(const ROMol &mol, int confId=defaultConfId);
208 
209  //! \brief flush the ostream
210  void flush() {
211  PRECONDITION(dp_ostream,"no output stream");
212  dp_ostream->flush();
213  };
214 
215  //! \brief close our stream (the writer cannot be used again)
216  void close() {
217  PRECONDITION(dp_ostream,"no output stream");
218  dp_ostream->flush();
219  if(df_owner) {
220  delete dp_ostream;
221  df_owner=false;
222  }
223  dp_ostream=NULL;
224  };
225 
226  //! \brief get the number of molecules written so far
227  unsigned int numMols() const { return d_molid; };
228 
229  void setWrite2D(bool state=true) { df_write2D=state; };
230  bool getWrite2D() const { return df_write2D; };
231 
232  void setWriteNames(bool state=true) { df_writeNames=state; };
233  bool getWriteNames() const { return df_writeNames; };
234 
235  void setNumDigits(unsigned int numDigits) { d_numDigits=numDigits; };
236  unsigned int getNumDigits() const { return d_numDigits;};
237 
238  private:
239  void writeProperty(const ROMol &mol, std::string name);
240 
241  std::ostream *dp_ostream;
242  bool df_owner;
243  unsigned int d_molid; // the number of molecules we wrote so far
244  STR_VECT d_props; // list of property name that need to be written out
245  bool df_write2D; // write 2D coordinates instead of 3D
246  bool df_writeNames; // write a name record for each molecule
247  unsigned int d_numDigits; // number of digits to use in our output of coordinates;
248  };
249 
250  //! The PDBWriter is for writing molecules to Brookhaven Protein
251  //! DataBank format files.
252  class PDBWriter : public MolWriter {
253  public:
254  PDBWriter(std::string fileName, unsigned int flavor = 0);
255  PDBWriter(std::ostream *outStream, bool takeOwnership=false,
256  unsigned int flavor = 0);
257  ~PDBWriter();
258 
259  //! \brief write a new molecule to the file
260  void write(const ROMol &mol, int confId=defaultConfId);
261 
262  void setProps(const STR_VECT&) {};
263 
264  //! \brief flush the ostream
265  void flush() {
266  PRECONDITION(dp_ostream,"no output stream");
267  dp_ostream->flush();
268  } ;
269 
270  //! \brief close our stream (the writer cannot be used again)
271  void close() {
272  PRECONDITION(dp_ostream,"no output stream");
273  dp_ostream->flush();
274  if(df_owner) {
275  delete dp_ostream;
276  df_owner=false;
277  }
278  dp_ostream=NULL;
279  };
280 
281  //! \brief get the number of molecules written so far
282  unsigned int numMols() const { return d_count;} ;
283 
284  private:
285  std::ostream *dp_ostream;
286  unsigned int d_flavor;
287  unsigned int d_count;
288  bool df_owner;
289  };
290 
291 
292 }
293 
294 #endif
295 
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:85
void setProps(const STR_VECT &)
Definition: MolWriters.h:262
bool getWriteNames() const
Definition: MolWriters.h:233
virtual ~MolWriter()
Definition: MolWriters.h:25
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:216
virtual unsigned int numMols() const =0
void flush()
flush the ostream
Definition: MolWriters.h:265
virtual void close()=0
void flush()
flush the ostream
Definition: MolWriters.h:79
Defines the primary molecule class ROMol as well as associated typedefs.
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:154
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:271
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:96
bool getForceV3000() const
Definition: MolWriters.h:168
void flush()
flush the ostream
Definition: MolWriters.h:148
bool getWrite2D() const
Definition: MolWriters.h:230
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:227
virtual void flush()=0
static int defaultConfId
Definition: MolWriters.h:22
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:235
void setWriteNames(bool state=true)
Definition: MolWriters.h:232
virtual void setProps(const STR_VECT &propNames)=0
void setForceV3000(bool val)
Definition: MolWriters.h:167
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
bool getKekulize() const
Definition: MolWriters.h:171
unsigned int getNumDigits() const
Definition: MolWriters.h:236
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:165
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
void setWrite2D(bool state=true)
Definition: MolWriters.h:229
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:282
void flush()
flush the ostream
Definition: MolWriters.h:210
std::vector< std::string > STR_VECT
Definition: Dict.h:26
void setKekulize(bool val)
Definition: MolWriters.h:170