RDKit
Open-source cheminformatics and machine learning.
SmilesParse.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2016 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_SMILESPARSE_H_
12 #define _RD_SMILESPARSE_H_
13 
14 #include <GraphMol/RWMol.h>
16 #include <string>
17 #include <exception>
18 #include <map>
19 
20 namespace RDKit {
21 
24  bool sanitize;
25  std::map<std::string, std::string> *replacements;
27  bool parseName;
28  bool removeHs;
30  : debugParse(0),
31  sanitize(true),
32  replacements(NULL),
33  allowCXSMILES(true),
34  parseName(false),
35  removeHs(true){};
36 };
37 RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi,
38  const SmilesParserParams &params);
39 
40 RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi);
41 RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
42 
43 //! Construct a molecule from a SMILES string
44 /*!
45  \param smi the SMILES to convert
46  \param debugParse toggles verbose debugging information from the parser
47  \param sanitize toggles H removal and sanitization of the molecule
48  \param replacements a string->string map of replacement strings. See below
49  for more information about replacements.
50 
51  \return a pointer to the new molecule; the caller is responsible for free'ing
52  this.
53 
54  The optional replacements map can be used to do string substitution of
55  abbreviations
56  in the input SMILES. The set of substitutions is repeatedly looped through
57  until
58  the string no longer changes. It is the responsiblity of the caller to make
59  sure
60  that substitutions results in legal and sensible SMILES.
61 
62  Examples of substitutions:
63  \code
64  CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
65  C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
66  C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CCNCCNC
67  \endcode
68 
69  */
71  const std::string &smi, int debugParse = 0, bool sanitize = true,
72  std::map<std::string, std::string> *replacements = 0) {
73  SmilesParserParams params;
74  params.debugParse = debugParse;
75  params.replacements = replacements;
76  if (sanitize) {
77  params.sanitize = true;
78  params.removeHs = true;
79  } else {
80  params.sanitize = false;
81  params.removeHs = false;
82  }
83  return SmilesToMol(smi, params);
84 };
85 
86 //! Construct a molecule from a SMARTS string
87 /*!
88  \param sma the SMARTS to convert
89  \param debugParse toggles verbose debugging information from the parser
90  \param mergeHs toggles merging H atoms in the SMARTS into neighboring
91  atoms
92  \param replacements a string->string map of replacement strings.
93  \see SmilesToMol for more information about replacements
94 
95  \return a pointer to the new molecule; the caller is responsible for free'ing
96  this.
97  */
99  const std::string &sma, int debugParse = 0, bool mergeHs = false,
100  std::map<std::string, std::string> *replacements = 0);
101 
102 RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma);
103 RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma);
104 
105 class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
106  public:
107  SmilesParseException(const char *msg) : _msg(msg){};
108  SmilesParseException(const std::string msg) : _msg(msg){};
109  const char *message() const { return _msg.c_str(); };
111 
112  private:
113  std::string _msg;
114 };
115 
116 inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
117  size_t len) {
118  std::string smi(text, len);
119  RWMol *ptr = nullptr;
120  try {
121  ptr = SmilesToMol(smi);
122  } catch (const RDKit::MolSanitizeException &e) {
123  ptr = nullptr;
124  }
125  return std::unique_ptr<RWMol>(ptr);
126 }
127 inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
128  size_t len) {
129  std::string smi(text, len);
130  RWMol *ptr = nullptr;
131  try {
132  ptr = SmartsToMol(smi);
133  } catch (const RDKit::MolSanitizeException &e) {
134  ptr = nullptr;
135  }
136  return std::unique_ptr<RWMol>(ptr);
137 }
138 
139 } // namespace RDKit
140 
141 #endif
RDKIT_SMILESPARSE_EXPORT Bond * SmartsToBond(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT Atom * SmartsToAtom(const std::string &sma)
class for flagging sanitization errors
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKIT_SMILESPARSE_EXPORT RWMol * SmartsToMol(const std::string &sma, int debugParse=0, bool mergeHs=false, std::map< std::string, std::string > *replacements=0)
Construct a molecule from a SMARTS string.
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:593
Std stuff.
Definition: Atom.h:30
RDKIT_SMILESPARSE_EXPORT Bond * SmilesToBond(const std::string &smi)
SmilesParseException(const char *msg)
Definition: SmilesParse.h:107
class for representing a bond
Definition: Bond.h:47
const char * message() const
Definition: SmilesParse.h:109
SmilesParseException(const std::string msg)
Definition: SmilesParse.h:108
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:25
RDKIT_SMILESPARSE_EXPORT Atom * SmilesToAtom(const std::string &smi)
The class for representing atoms.
Definition: Atom.h:69
Defines the editable molecule class RWMol.