RDKit
Open-source cheminformatics and machine learning.
ReactionParser.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2007-2014, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 
33 #include <RDGeneral/export.h>
34 #ifndef RD_REACTIONPARSER_H_21Aug2006
35 #define RD_REACTIONPARSER_H_21Aug2006
36 
37 #include <string>
38 #include <iostream>
39 #include <fstream>
40 #include <sstream>
41 #include <utility>
42 #include <boost/format.hpp>
45 
46 namespace RDKit {
47 class ChemicalReaction;
48 
49 //! used to indicate an error in parsing reaction data
51  : public std::exception {
52  public:
53  //! construct with an error message
54  explicit ChemicalReactionParserException(const char *msg) : _msg(msg) {}
55  //! construct with an error message
56  explicit ChemicalReactionParserException(std::string msg)
57  : _msg(std::move(msg)) {}
58  //! get the error message
59  const char *what() const noexcept override { return _msg.c_str(); }
60  ~ChemicalReactionParserException() noexcept override = default;
61 
62  private:
63  std::string _msg;
64 };
65 
66 //---------------------------------------------------------------------------
67 //! \name Reaction SMARTS/SMILES Support
68 //@{
69 
70 //! Parse a string containing "Reaction SMARTS" into a ChemicalReaction
71 /*!
72  Our definition of Reaction SMARTS is something that looks a lot like
73  reaction SMILES, except that SMARTS queries are allowed on the reactant
74  side and that atom-map numbers are required (at least for now)
75 
76  \param text the SMARTS to convert
77  \param replacements a string->string map of replacement strings.
78  \see SmilesToMol for more information about replacements
79  \param useSmiles if set, the SMILES parser will be used instead of the
80  SMARTS
81  parserfor the individual components
82  */
84  const std::string &text,
85  std::map<std::string, std::string> *replacements = nullptr,
86  bool useSmiles = false);
87 
88 //! returns the reaction SMARTS for a reaction
90  const ChemicalReaction &rxn);
91 
92 //! returns the reaction SMILES for a reaction
94  const ChemicalReaction &rxn, bool canonical = true);
95 //@}
96 
97 //---------------------------------------------------------------------------
98 //! \name Reaction Mol Support
99 //@{
100 
101 //! Parse a ROMol into a ChemicalReaction, RXN role must be set before
102 /*!
103  Alternative to build a reaction from a molecule (fragments) which have RXN
104  roles
105  set as atom properties: common_properties::molRxnRole (1=reactant, 2=product,
106  3=agent)
107 
108  \param mol ROMol with RXN roles set
109  */
111  const ROMol &mol);
112 
113 //! returns a ROMol with RXN roles used to describe the reaction
115  const ChemicalReaction &rxn);
116 //@}
117 
118 //---------------------------------------------------------------------------
119 //! \name MDL rxn Support
120 //@{
121 
122 //! Parse a text block in MDL rxn format into a ChemicalReaction
124  const std::string &rxnBlock, bool sanitize = false, bool removeHs = false,
125  bool strictParsing = true);
126 //! Parse a file in MDL rxn format into a ChemicalReaction
128  const std::string &fileName, bool sanitize = false, bool removeHs = false,
129  bool strictParsing = true);
130 //! Parse a text stream in MDL rxn format into a ChemicalReaction
132  std::istream &rxnStream, unsigned int &line, bool sanitize = false,
133  bool removeHs = false, bool strictParsing = true);
134 //! returns an rxn block for a reaction
135 /*!
136  \param rxn chemical reaction
137  \param separateAgents flag to decide if agents were put in a separate block,
138  otherwise they were included in the reactants block
139  (default)
140  */
142  const ChemicalReaction &rxn, bool separateAgents = false);
143 
144 //@}
145 
146 //---------------------------------------------------------------------------
147 //! \name PNG Support
148 //@{
149 
150 //! Tags used for PNG metadata
151 namespace PNGData {
152 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnSmilesTag;
153 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnSmartsTag;
154 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnRxnTag;
155 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnPklTag;
156 } // namespace PNGData
157 
158 //! \brief constructs a ChemicalReaction from the metadata in a PNG stream
159 /*!
160 
161 Looks through the metadata in the PNG to find the first tag that matches one of
162 the tags in \c RDKit::PNGData. A molecule is constructed from this chunk.
163 
164 Throws a \c FileParseException if no suitable tag is found.
165 
166 The caller is responsible for the returned pointer.
167 
168  */
170  std::istream &pngStream);
171 //! \brief constructs a ChemicalReaction from the metadata in a PNG string
172 //! See \c PNGStreamToChemicalReaction() for more details
173 inline ChemicalReaction *PNGStringToChemicalReaction(const std::string &data) {
174  std::stringstream inStream(data);
175  return PNGStreamToChemicalReaction(inStream);
176 };
177 //! \brief constructs a ChemicalReaction from the metadata in a PNG file
178 //! See \c PNGStreamToChemicalReaction() for more details
179 inline ChemicalReaction *PNGFileToChemicalReaction(const std::string &fname) {
180  std::ifstream inStream(fname.c_str(), std::ios::binary);
181  if (!inStream || (inStream.bad())) {
182  throw BadFileException((boost::format("Bad input file %s") % fname).str());
183  }
184  return PNGStreamToChemicalReaction(inStream);
185 };
186 
187 //! \brief adds metadata for a ChemicalReaction to the data from a PNG stream.
188 //! The modified PNG data is returned.
189 /*!
190 
191  \param rxn the reaction to add
192  \param iStream the stream to read from
193  \param includePkl include a reaction pickle
194  \param includeSmiles include reaction SMILES for the reaction
195  \param includeSmarts include reaction SMARTS for the reaction
196  \param includeRxn include an RXN block for the reaction
197 
198 */
200  const ChemicalReaction &rxn, std::istream &iStream, bool includePkl = true,
201  bool includeSmiles = true, bool includeSmarts = false,
202  bool includeRxn = false);
203 //! \brief adds metadata for a ChemicalReaction to the data from a PNG string.
204 //! See addChemicalReactionToPNGStream() for more details.
205 inline std::string addChemicalReactionToPNGString(const ChemicalReaction &rxn,
206  const std::string &pngString,
207  bool includePkl = true,
208  bool includeSmiles = true,
209  bool includeSmarts = false,
210  bool includeRxn = false) {
211  std::stringstream inStream(pngString);
213  rxn, inStream, includePkl, includeSmiles, includeSmarts, includeRxn);
214 }
215 //! \brief adds metadata for a ChemicalReaction to the data from a PNG string.
216 //! See addChemicalReactionToPNGStream() for more details.
217 inline std::string addChemicalReactionToPNGFile(const ChemicalReaction &rxn,
218  const std::string &fname,
219  bool includePkl = true,
220  bool includeSmiles = true,
221  bool includeSmarts = false,
222  bool includeRxn = false) {
223  std::ifstream inStream(fname.c_str(), std::ios::binary);
225  rxn, inStream, includePkl, includeSmiles, includeSmarts, includeRxn);
226 }
227 //@}
228 
229 inline std::unique_ptr<ChemicalReaction> operator"" _rxnsmarts(const char *text,
230  size_t len) {
231  std::string sma(text, len);
232  ChemicalReaction *ptr = nullptr;
233  try {
234  ptr = RxnSmartsToChemicalReaction(sma);
235  } catch (...) {
236  ptr = nullptr;
237  }
238  return std::unique_ptr<ChemicalReaction>(ptr);
239 }
240 inline std::unique_ptr<ChemicalReaction> operator"" _rxnsmiles(const char *text,
241  size_t len) {
242  std::string sma(text, len);
243  ChemicalReaction *ptr = nullptr;
244  try {
245  ptr = RxnSmartsToChemicalReaction(sma, nullptr, true);
246  } catch (...) {
247  ptr = nullptr;
248  }
249  return std::unique_ptr<ChemicalReaction>(ptr);
250 }
251 
252 }; // namespace RDKit
253 
254 #endif
used by various file parsing classes to indicate a bad file
used to indicate an error in parsing reaction data
const char * what() const noexcept override
get the error message
ChemicalReactionParserException(std::string msg)
construct with an error message
~ChemicalReactionParserException() noexcept override=default
ChemicalReactionParserException(const char *msg)
construct with an error message
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:121
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:49
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnRxnTag
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnSmilesTag
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnSmartsTag
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnPklTag
Std stuff.
Definition: Abbreviations.h:18
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnSmiles(const ChemicalReaction &rxn, bool canonical=true)
returns the reaction SMILES for a reaction
ChemicalReaction * PNGFileToChemicalReaction(const std::string &fname)
constructs a ChemicalReaction from the metadata in a PNG file See PNGStreamToChemicalReaction() for m...
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * PNGStreamToChemicalReaction(std::istream &pngStream)
constructs a ChemicalReaction from the metadata in a PNG stream
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnBlockToChemicalReaction(const std::string &rxnBlock, bool sanitize=false, bool removeHs=false, bool strictParsing=true)
Parse a text block in MDL rxn format into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT std::string addChemicalReactionToPNGStream(const ChemicalReaction &rxn, std::istream &iStream, bool includePkl=true, bool includeSmiles=true, bool includeSmarts=false, bool includeRxn=false)
adds metadata for a ChemicalReaction to the data from a PNG stream. The modified PNG data is returned...
RDKIT_CHEMREACTIONS_EXPORT ROMol * ChemicalReactionToRxnMol(const ChemicalReaction &rxn)
returns a ROMol with RXN roles used to describe the reaction
ChemicalReaction * PNGStringToChemicalReaction(const std::string &data)
constructs a ChemicalReaction from the metadata in a PNG string See PNGStreamToChemicalReaction() for...
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnDataStreamToChemicalReaction(std::istream &rxnStream, unsigned int &line, bool sanitize=false, bool removeHs=false, bool strictParsing=true)
Parse a text stream in MDL rxn format into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnSmarts(const ChemicalReaction &rxn)
returns the reaction SMARTS for a reaction
std::string addChemicalReactionToPNGFile(const ChemicalReaction &rxn, const std::string &fname, bool includePkl=true, bool includeSmiles=true, bool includeSmarts=false, bool includeRxn=false)
adds metadata for a ChemicalReaction to the data from a PNG string. See addChemicalReactionToPNGStrea...
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnFileToChemicalReaction(const std::string &fileName, bool sanitize=false, bool removeHs=false, bool strictParsing=true)
Parse a file in MDL rxn format into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnBlock(const ChemicalReaction &rxn, bool separateAgents=false)
returns an rxn block for a reaction
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnMolToChemicalReaction(const ROMol &mol)
Parse a ROMol into a ChemicalReaction, RXN role must be set before.
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnSmartsToChemicalReaction(const std::string &text, std::map< std::string, std::string > *replacements=nullptr, bool useSmiles=false)
Parse a string containing "Reaction SMARTS" into a ChemicalReaction.
std::string addChemicalReactionToPNGString(const ChemicalReaction &rxn, const std::string &pngString, bool includePkl=true, bool includeSmiles=true, bool includeSmarts=false, bool includeRxn=false)
adds metadata for a ChemicalReaction to the data from a PNG string. See addChemicalReactionToPNGStrea...