RDKit
Open-source cheminformatics and machine learning.
StructChecker.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2016 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 /*! \file StructChecker.h
12 
13 \brief Contains the public API of the StructChecker
14 
15 \b Note that this should be considered beta and that the API may change in
16 future
17 releases.
18 
19 */
20 #include <RDGeneral/export.h>
21 #pragma once
22 #ifndef RD_STRUCTCHECKER_H_Oct2016
23 #define RD_STRUCTCHECKER_H_Oct2016
24 
25 #include <string>
26 #include <vector>
27 #include "../RDKitBase.h"
28 
29 namespace RDKit {
30 namespace StructureCheck {
31 
32 // Flags for the return values of the StructureChecker
33 
34 // TypeDefs for translating augmented atom pairs
35 static const int ANY_CHARGE = 8;
37  RT_NONE = 0,
38  SINGLET = 1,
39  DOUBLET = 2,
40  TRIPLET = 3,
41  ANY_RADICAL = 0xFF
42 };
43 
44 enum AABondType { // MDL CTFile bond types plus extensions
45  BT_NONE = 0, // means REMOVE Bond
46  SINGLE = 1,
47  DOUBLE = 2,
48  TRIPLE = 3,
49  AROMATIC = 4,
53  ANY_BOND = 8,
55 };
56 
57 enum AATopology {
58  TP_NONE = 0, // Don't care
59  RING = 1, // Ring
60  CHAIN = 2 // Chain
61 };
62 
64  std::string AtomSymbol;
65  int Charge;
67  unsigned SubstitutionCount; // substitution count 0 = don't care
70  : Charge(ANY_CHARGE),
71  Radical(ANY_RADICAL),
72  SubstitutionCount(0),
73  BondType(ANY_BOND) {}
74 };
75 
77  std::string AtomSymbol;
78  std::string ShortName;
79  int Charge;
82  std::vector<Ligand> Ligands;
83 
85  : Charge(ANY_CHARGE), Radical(ANY_RADICAL), Topology(TP_NONE) {}
86 
87  AugmentedAtom(const std::string &symbol, const std::string &name, int charge,
88  RadicalType radical, AATopology topology)
89  : AtomSymbol(symbol),
90  ShortName(name),
91  Charge(charge),
92  Radical(radical),
93  Topology(topology) {}
94 };
95 
97  std::string AtomSymbol;
98  double LocalInc;
99  double AlphaInc;
100  double BetaInc;
101  double MultInc;
102 
103  // Used for logging
108 };
109 
112  double Cond;
113  // Used for logging
115 };
116 //-------------
117 
118 //! Structure Check Options
119 // Holds all the user options for the StructureChecking.
120 // Can be initialized from factory functions, perhaps serialized
122  double AcidityLimit;
127  unsigned MaxMolSize;
134  bool Verbose;
135 
136  // Internal data for struchk
137  std::vector<std::pair<AugmentedAtom, AugmentedAtom> > AugmentedAtomPairs;
138  std::vector<AugmentedAtom> AcidicAtoms;
139  std::vector<AugmentedAtom> GoodAtoms;
140  std::vector<ROMOL_SPTR> Patterns;
141  std::vector<ROMOL_SPTR> RotatePatterns;
142  std::vector<ROMOL_SPTR> StereoPatterns;
143  std::vector<ROMOL_SPTR> FromTautomer;
144  std::vector<ROMOL_SPTR> ToTautomer;
145 
146  double Elneg0; // elneg_table[0].value;
147  std::map<unsigned, double> ElnegTable; // AtomicNumber -> eleng
148  std::vector<IncEntry> AtomAcidity; // atom_acidity_table[]
149  std::vector<IncEntry> ChargeIncTable;
150  // std::map AtomSymbol(or AtomicNumber) -> IncEntry
151  /* [ReadTransformation() ]
152  * The alpha, beta coefficients of the transfomation function used
153  * to stretch the preliminary pKa values to the actual predictions.
154  * The function is pKa = 7 + (pKa'-7)*beta + ((pKa'-7)*alpha)^3.
155  */
156 
157  double Alpha, Beta;
158  std::vector<PathEntry> AlphaPathTable, BetaPathTable;
159 
160  public:
162 
163  void clear() { *this = StructCheckerOptions(); }
164 
165  bool loadAugmentedAtomTranslations(const std::string &path);
166  void setAugmentedAtomTranslations(
167  const std::vector<std::pair<AugmentedAtom, AugmentedAtom> > &aaPairs);
168 
169  bool loadAcidicAugmentedAtoms(const std::string &path);
170  void setAcidicAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
171 
172  bool loadGoodAugmentedAtoms(const std::string &path);
173  void setGoodAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
174 
175  bool loadPatterns(const std::string &path); // file with clean patterns
176  void parsePatterns(
177  const std::vector<std::string> &smarts); // can throw RDKit exeptions
178  void setPatterns(const std::vector<ROMOL_SPTR> &p);
179 
180  bool loadRotatePatterns(
181  const std::string &path); // file with rotate patterns
182  void parseRotatePatterns(
183  const std::vector<std::string> &smarts); // can throw RDKit exeptions
184  void setRotatePatterns(const std::vector<ROMOL_SPTR> &p);
185 
186  bool loadStereoPatterns(
187  const std::string &path); // file with stereo patterns
188  void parseStereoPatterns(
189  const std::vector<std::string> &smarts); // can throw RDKit exeptions
190  void setStereoPatterns(const std::vector<ROMOL_SPTR> &p);
191 
192  bool loadTautomerData(const std::string &path); // file path
193  void parseTautomerData(const std::vector<std::string> &smartsFrom,
194  const std::vector<std::string> &smartsTo);
195  void setTautomerData(const std::vector<ROMOL_SPTR> &from,
196  const std::vector<ROMOL_SPTR> &to);
197  bool loadChargeDataTables(const std::string &path); // file path
198 };
199 
200 RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op);
201 
204  const std::string &augmentedAtomTranslationsFile = "",
205  // ?? AcidicAtoms;
206  // ?? GoodAtoms;
207  const std::string &patternFile = "", // file with clean patterns
208  const std::string &rotatePatternFile = "", // file with rotate patterns
209  const std::string &stereoPatternFile = "", // file with stereo patterns
210  const std::string &tautomerFile = "");
211 
212 //! \brief Class for performing structure validation and cleanup
213 /*! \b NOTE: This class should be considered beta. The API may change in future
214 releases.
215 
216 Examples of Usage
217 
218 \code
219  StructChecker chk;
220  int flags = StructureCheck::checkMolStructure( mol ); // use defaults
221 \endcode
222 
223 or
224 
225 \code
226  StructureCheck::StructCheckerOptions options; // use defaults
227  // To use external data
228  StructureCheck::loadOptionsFromFiles(options, file1, file2);
229  StructChecker chk(options);
230 
231  for( mol in mols ) {
232  int flags = StructureCheck::checkMolStructure( mol, &options);
233  if (0!=(flags & StructureCheck::StructureFlags::BAD_SET)) {
234  // write to error file
235  } else if (0!=(flags & StructureCheck::StructureFlags::TRANSFORMED_SET))
236 {
237  // input molecule was transformed
238  } else { // flag == NO_CHANGE
239  // no change
240  }
241  }
242 \endcode
243 */
245  public:
246  typedef enum StructureFlags {
247  NO_CHANGE = 0,
248  BAD_MOLECULE = 0x0001,
249  ALIAS_CONVERSION_FAILED = 0x0002,
250  STEREO_ERROR = 0x0004,
251  STEREO_FORCED_BAD = 0x0008,
252  ATOM_CLASH = 0x0010,
253  ATOM_CHECK_FAILED = 0x0020,
254  SIZE_CHECK_FAILED = 0x0040,
255  // reserved error = 0x0080,
256  TRANSFORMED = 0x0100,
257  FRAGMENTS_FOUND = 0x0200,
258  EITHER_WARNING = 0x0400,
259  DUBIOUS_STEREO_REMOVED = 0x0800,
260  RECHARGED = 0x1000,
261  STEREO_TRANSFORMED = 0x2000,
262  TEMPLATE_TRANSFORMED = 0x4000,
263  TAUTOMER_TRANSFORMED = 0x8000,
264  // mask:
265  BAD_SET = (BAD_MOLECULE | ALIAS_CONVERSION_FAILED | STEREO_ERROR |
266  STEREO_FORCED_BAD | ATOM_CLASH | ATOM_CHECK_FAILED |
267  SIZE_CHECK_FAILED),
268 
269  TRANSFORMED_SET = (TRANSFORMED | FRAGMENTS_FOUND | EITHER_WARNING |
270  DUBIOUS_STEREO_REMOVED | STEREO_TRANSFORMED |
271  TEMPLATE_TRANSFORMED | TAUTOMER_TRANSFORMED | RECHARGED),
272  } StructureFlags;
273  // attributes:
274  private:
275  StructCheckerOptions Options;
276 
277  public:
278  inline StructChecker() {}
279  inline StructChecker(const StructCheckerOptions &options)
280  : Options(options) {}
281 
282  const StructCheckerOptions &GetOptions() const { return Options; }
283  void SetOptions(const StructCheckerOptions &options) { Options = options; }
284 
285  // Check and fix (if need) molecule structure and return a set of
286  // StructureFlags
287  // that describes what have been done
288  unsigned checkMolStructure(RWMol &mol) const;
289 
290  // an instance independed helper methods:
291  // Converts structure property flags to a comma seperated string
292  static std::string StructureFlagsToString(unsigned flags);
293  // Converts a comma seperated string to a StructureFlag unsigned integer
294  static unsigned StringToStructureFlags(const std::string &str);
295  // internal implementation:
296  private:
297 };
298 }
299 }
300 #endif
Class for performing structure validation and cleanup.
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
std::vector< std::pair< AugmentedAtom, AugmentedAtom > > AugmentedAtomPairs
RDKIT_STRUCTCHECKER_EXPORT bool loadOptionsFromFiles(StructCheckerOptions &op, const std::string &augmentedAtomTranslationsFile="", const std::string &patternFile="", const std::string &rotatePatternFile="", const std::string &stereoPatternFile="", const std::string &tautomerFile="")
AugmentedAtom(const std::string &symbol, const std::string &name, int charge, RadicalType radical, AATopology topology)
Definition: StructChecker.h:87
RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op)
StructChecker(const StructCheckerOptions &options)
std::vector< AugmentedAtom > GoodAtoms
static const int ANY_CHARGE
Definition: StructChecker.h:35
Std stuff.
Definition: Atom.h:30
std::vector< ROMOL_SPTR > StereoPatterns
std::map< unsigned, double > ElnegTable
#define RDKIT_STRUCTCHECKER_EXPORT
Definition: export.h:606
const StructCheckerOptions & GetOptions() const
void SetOptions(const StructCheckerOptions &options)
std::vector< ROMOL_SPTR > RotatePatterns
std::vector< AugmentedAtom > AcidicAtoms