RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SmilesWrite.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESWRITE_H_012020
12#define RD_SMILESWRITE_H_012020
13
14#include <string>
15#include <vector>
16#include <memory>
17#include <cstdint>
18#include <limits>
19
20namespace RDKit {
21class Atom;
22class Bond;
23class ROMol;
24
26 bool doIsomericSmiles =
27 true; /**< include stereochemistry and isotope information */
28 bool doKekule = false; /**< kekulize the molecule before generating the SMILES
29 and output single/double bonds. NOTE that the output
30 is not canonical and that this will thrown an
31 exception if the molecule cannot be kekulized. */
32 bool canonical = true; /**< generate canonical SMILES */
33 bool allBondsExplicit = false; /**< include symbols for all bonds */
34 bool allHsExplicit = false; /**< provide hydrogen counts for every atom */
35 bool doRandom = false; /**< randomize the output order. The resulting SMILES
36 is not canonical */
37 int rootedAtAtom = -1; /**< make sure the SMILES starts at the specified
38 atom. The resulting SMILES is not canonical */
39};
40namespace SmilesWrite {
41
57
58//! \brief returns the cxsmiles data for a molecule
60 const ROMol &mol, std::uint32_t flags = CXSmilesFields::CX_ALL);
61
62//! \brief returns true if the atom number is in the SMILES organic subset
64
65//! \brief returns the SMILES for an atom
66/*!
67 \param atom : the atom to work with
68 \param doKekule : we're doing kekulized smiles (e.g. don't use
69 lower case for the atom label)
70 \param bondIn : the bond we came into the atom on (unused)
71 \param allHsExplicit : if true, hydrogen counts will be provided for every
72 atom.
73 \param isomericSmiles : if true, isomeric SMILES will be generated
74*/
76 bool doKekule = false,
77 const Bond *bondIn = nullptr,
78 bool allHsExplicit = false,
79 bool isomericSmiles = true);
80
81//! \brief returns the SMILES for a bond
82/*!
83 \param bond : the bond to work with
84 \param atomToLeftIdx : the index of the atom preceding \c bond
85 in the SMILES
86 \param doKekule : we're doing kekulized smiles (e.g. write out
87 bond orders for aromatic bonds)
88 \param allBondsExplicit : if true, symbols will be included for all bonds.
89*/
91 const Bond *bond, int atomToLeftIdx = -1, bool doKekule = false,
92 bool allBondsExplicit = false);
93
94namespace detail {
96 const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles);
97}
98
99} // namespace SmilesWrite
100
101//! \brief returns canonical SMILES for a molecule
103 const ROMol &mol, const SmilesWriteParams &params);
104
105//! \brief returns canonical SMILES for a molecule
106/*!
107 \param mol : the molecule in question.
108 \param doIsomericSmiles : include stereochemistry and isotope information
109 in the SMILES
110
111 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
112 this will throw an exception if the molecule cannot be kekulized.
113
114 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
115 The resulting SMILES is not, of course, canonical.
116 \param canonical : if false, no attempt will be made to canonicalize the
117 SMILES
118 \param allBondsExplicit : if true, symbols will be included for all bonds.
119 \param allHsExplicit : if true, hydrogen counts will be provided for every
120 atom.
121 */
122inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
123 bool doKekule = false, int rootedAtAtom = -1,
124 bool canonical = true,
125 bool allBondsExplicit = false,
126 bool allHsExplicit = false,
127 bool doRandom = false) {
129 ps.doIsomericSmiles = doIsomericSmiles;
130 ps.doKekule = doKekule;
131 ps.rootedAtAtom = rootedAtAtom;
132 ps.canonical = canonical;
133 ps.allBondsExplicit = allBondsExplicit;
134 ps.allHsExplicit = allHsExplicit;
135 ps.doRandom = doRandom;
136 return MolToSmiles(mol, ps);
137};
138
139//! \brief returns a vector of random SMILES for a molecule (may contain
140//! duplicates)
141/*!
142 \param mol : the molecule in question.
143 \param numSmiles : the number of SMILES to return
144 \param randomSeed : if >0, will be used to seed the random number generator
145 \param doIsomericSmiles : include stereochemistry and isotope information
146 in the SMILES
147 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
148 \param allBondsExplicit : if true, symbols will be included for all bonds.
149 \param allHsExplicit : if true, hydrogen counts will be provided for every
150 atom.
151 */
153 const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
154 bool doIsomericSmiles = true, bool doKekule = false,
155 bool allBondsExplicit = false, bool allHsExplicit = false);
156
157//! \brief returns canonical SMILES for part of a molecule
159 const ROMol &mol, const SmilesWriteParams &params,
160 const std::vector<int> &atomsToUse,
161 const std::vector<int> *bondsToUse = nullptr,
162 const std::vector<std::string> *atomSymbols = nullptr,
163 const std::vector<std::string> *bondSymbols = nullptr);
164
165//! \brief returns canonical SMILES for part of a molecule
166/*!
167 \param mol : the molecule in question.
168 \param atomsToUse : indices of the atoms in the fragment
169 \param bondsToUse : indices of the bonds in the fragment. If this is not
170 provided,
171 all bonds between the atoms in atomsToUse will be included
172 \param atomSymbols : symbols to use for the atoms in the output SMILES
173 \param bondSymbols : symbols to use for the bonds in the output SMILES
174 \param doIsomericSmiles : include stereochemistry and isotope information
175 in the SMILES
176 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
177 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
178 The resulting SMILES is not, of course, canonical.
179 \param canonical : if false, no attempt will be made to canonicalize the
180 SMILES
181 \param allBondsExplicit : if true, symbols will be included for all bonds.
182 \param allHsExplicit : if true, hydrogen counts will be provided for every
183 atom.
184 \param doRandom : generate a randomized smiles string by randomly choosing
185 the priority to follow in the DFS traversal. [default false]
186
187 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
188
189 */
190inline std::string MolFragmentToSmiles(
191 const ROMol &mol, const std::vector<int> &atomsToUse,
192 const std::vector<int> *bondsToUse = nullptr,
193 const std::vector<std::string> *atomSymbols = nullptr,
194 const std::vector<std::string> *bondSymbols = nullptr,
195 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
196 bool canonical = true, bool allBondsExplicit = false,
197 bool allHsExplicit = false) {
199 ps.doIsomericSmiles = doIsomericSmiles;
200 ps.doKekule = doKekule;
201 ps.rootedAtAtom = rootedAtAtom;
202 ps.canonical = canonical;
203 ps.allBondsExplicit = allBondsExplicit;
204 ps.allHsExplicit = allHsExplicit;
205 return MolFragmentToSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
207}
208
209//! \brief returns canonical CXSMILES for a molecule
211 const ROMol &mol, const SmilesWriteParams &ps,
212 std::uint32_t flags = SmilesWrite::CXSmilesFields::CX_ALL);
213
214//! \brief returns canonical CXSMILES for a molecule
215/*!
216 \param mol : the molecule in question.
217 \param doIsomericSmiles : include stereochemistry and isotope information
218 in the SMILES
219 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
220 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
221 The resulting SMILES is not, of course, canonical.
222 \param canonical : if false, no attempt will be made to canonicalize the
223 SMILES
224 \param allBondsExplicit : if true, symbols will be included for all bonds.
225 \param allHsExplicit : if true, hydrogen counts will be provided for every
226 atom.
227 */
228inline std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles = true,
229 bool doKekule = false, int rootedAtAtom = -1,
230 bool canonical = true,
231 bool allBondsExplicit = false,
232 bool allHsExplicit = false,
233 bool doRandom = false) {
235 ps.doIsomericSmiles = doIsomericSmiles;
236 ps.doKekule = doKekule;
237 ps.rootedAtAtom = rootedAtAtom;
238 ps.canonical = canonical;
239 ps.allBondsExplicit = allBondsExplicit;
240 ps.allHsExplicit = allHsExplicit;
241 ps.doRandom = doRandom;
242 return MolToCXSmiles(mol, ps);
243};
244
245//! \brief returns canonical CXSMILES for part of a molecule
247 const ROMol &mol, const SmilesWriteParams &params,
248 const std::vector<int> &atomsToUse,
249 const std::vector<int> *bondsToUse = nullptr,
250 const std::vector<std::string> *atomSymbols = nullptr,
251 const std::vector<std::string> *bondSymbols = nullptr);
252
253//! \brief returns canonical CXSMILES for part of a molecule
254/*!
255 \param mol : the molecule in question.
256 \param atomsToUse : indices of the atoms in the fragment
257 \param bondsToUse : indices of the bonds in the fragment. If this is not
258 provided,
259 all bonds between the atoms in atomsToUse will be included
260 \param atomSymbols : symbols to use for the atoms in the output SMILES
261 \param bondSymbols : symbols to use for the bonds in the output SMILES
262 \param doIsomericSmiles : include stereochemistry and isotope information
263 in the SMILES
264 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
265 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
266 The resulting SMILES is not, of course, canonical.
267 \param canonical : if false, no attempt will be made to canonicalize the
268 SMILES
269 \param allBondsExplicit : if true, symbols will be included for all bonds.
270 \param allHsExplicit : if true, hydrogen counts will be provided for every
271 atom.
272
273 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
274
275 */
276inline std::string MolFragmentToCXSmiles(
277 const ROMol &mol, const std::vector<int> &atomsToUse,
278 const std::vector<int> *bondsToUse = nullptr,
279 const std::vector<std::string> *atomSymbols = nullptr,
280 const std::vector<std::string> *bondSymbols = nullptr,
281 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
282 bool canonical = true, bool allBondsExplicit = false,
283 bool allHsExplicit = false) {
285 ps.doIsomericSmiles = doIsomericSmiles;
286 ps.doKekule = doKekule;
287 ps.rootedAtAtom = rootedAtAtom;
288 ps.canonical = canonical;
289 ps.allBondsExplicit = allBondsExplicit;
290 ps.allHsExplicit = allHsExplicit;
291 return MolFragmentToCXSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
293}
294
295} // namespace RDKit
296#endif
The class for representing atoms.
Definition Atom.h:68
class for representing a bond
Definition Bond.h:47
#define RDKIT_SMILESPARSE_EXPORT
Definition export.h:481
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles)
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber)
returns true if the atom number is in the SMILES organic subset
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx=-1, bool doKekule=false, bool allBondsExplicit=false)
returns the SMILES for a bond
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol, std::uint32_t flags=CXSmilesFields::CX_ALL)
returns the cxsmiles data for a molecule
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, bool doKekule=false, const Bond *bondIn=nullptr, bool allHsExplicit=false, bool isomericSmiles=true)
returns the SMILES for an atom
Std stuff.
RDKIT_SMILESPARSE_EXPORT std::vector< std::string > MolToRandomSmilesVect(const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed=0, bool doIsomericSmiles=true, bool doKekule=false, bool allBondsExplicit=false, bool allHsExplicit=false)
returns a vector of random SMILES for a molecule (may contain duplicates)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical SMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(const ROMol &mol, const SmilesWriteParams &ps, std::uint32_t flags=SmilesWrite::CXSmilesFields::CX_ALL)
returns canonical CXSMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical CXSMILES for part of a molecule