RDKit
Open-source cheminformatics and machine learning.
RDKitFPGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_RDFINGERPRINTGEN_H_2018_07
13 #define RD_RDFINGERPRINTGEN_H_2018_07
14 
16 
17 namespace RDKit {
18 namespace RDKitFP {
19 
20 template <typename OutputType>
22  : public FingerprintArguments<OutputType> {
23  public:
24  const unsigned int d_minPath;
25  const unsigned int d_maxPath;
26  const bool df_useHs;
27  const bool df_branchedPaths;
28  const bool df_useBondOrder;
29 
30  OutputType getResultSize() const;
31 
32  std::string infoString() const;
33 
34  /**
35  \brief Construct a new RDKitFPArguments object
36 
37  \param minPath the minimum path length (in bonds) to be included
38  \param maxPath the maximum path length (in bonds) to be included
39  \param useHs toggles inclusion of Hs in paths (if the molecule has
40  explicit Hs)
41  \param branchedPaths toggles generation of branched subgraphs, not just
42  linear paths
43  \param useBondOrder toggles inclusion of bond orders in the path hashes
44  \param useCountSimulation if set, use count simulation while
45  generating the fingerprint
46  \param countBounds boundaries for count simulation, corresponding bit will
47  be set if the count is higher than the number provided for that spot
48  \param fpSize size of the generated fingerprint, does not affect the sparse
49  versions
50 
51  */
52  RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs,
53  bool branchedPaths, bool useBondOrder,
54  const bool countSimulation,
55  const std::vector<std::uint32_t> countBounds,
56  const std::uint32_t fpSize);
57 };
58 
60  : public AtomInvariantsGenerator {
61  public:
62  std::vector<std::uint32_t> *getAtomInvariants(const ROMol &mol) const;
63 
64  std::string infoString() const;
65  RDKitFPAtomInvGenerator *clone() const;
66 };
67 
68 template <typename OutputType>
70  : public AtomEnvironment<OutputType> {
71  const OutputType d_bitId;
72  const boost::dynamic_bitset<> d_atomsInPath;
73 
74  public:
75  OutputType getBitId(FingerprintArguments<OutputType> *arguments,
76  const std::vector<std::uint32_t> *atomInvariants,
77  const std::vector<std::uint32_t> *bondInvariants,
78  const AdditionalOutput *additionalOutput,
79  const bool hashResults = false) const;
80 
81  /**
82  \brief Construct a new RDKitFPAtomEnv object
83 
84  \param bitId bitId generated for this environment
85  \param atomsInPath holds atoms in this environment to set additional output
86 
87  */
88  RDKitFPAtomEnv(const OutputType bitId,
89  const boost::dynamic_bitset<> atomsInPath);
90 };
91 
92 template <typename OutputType>
94  : public AtomEnvironmentGenerator<OutputType> {
95  public:
96  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
97  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
98  const std::vector<std::uint32_t> *fromAtoms,
99  const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
100  const AdditionalOutput *additionalOutput,
101  const std::vector<std::uint32_t> *atomInvariants,
102  const std::vector<std::uint32_t> *bondInvariants,
103  const bool hashResults = false) const;
104 
105  std::string infoString() const;
106 };
107 
108 /**
109  \brief Get a RDKit fingerprint generator with given parameters
110 
111  \tparam OutputType determines the size of the bitIds and the result, can be 32
112  or 64 bit unsigned integer
113  \param minPath the minimum path length (in bonds) to be included
114  \param maxPath the maximum path length (in bonds) to be included
115  \param useHs toggles inclusion of Hs in paths (if the molecule has
116  explicit Hs)
117  \param branchedPaths toggles generation of branched subgraphs, not just
118  linear paths
119  \param useBondOrder toggles inclusion of bond orders in the path hashes
120  \param atomInvariantsGenerator custom atom invariants generator to use
121  \param useCountSimulation if set, use count simulation while
122  generating the fingerprint
123  \param countBounds boundaries for count simulation, corresponding bit will be
124  set if the count is higher than the number provided for that spot
125  \param fpSize size of the generated fingerprint, does not affect the sparse
126  versions
127  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
128  fingerprint generator
129 
130  /return FingerprintGenerator<OutputType>* that generated RDKit fingerprints
131  */
132 template <typename OutputType>
134  const unsigned int minPath = 1, const unsigned int maxPath = 7,
135  const bool useHs = true, const bool branchedPaths = true,
136  const bool useBondOrder = true,
137  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
138  const bool countSimulation = true,
139  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
140  const std::uint32_t fpSize = 2048, const bool ownsAtomInvGen = false);
141 
142 } // namespace RDKitFP
143 } // namespace RDKit
144 
145 #endif
abstract base class that holds atom-environments that will be hashed to generate the fingerprint ...
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(const unsigned int minPath=1, const unsigned int maxPath=7, const bool useHs=true, const bool branchedPaths=true, const bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, const bool countSimulation=true, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const std::uint32_t fpSize=2048, const bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
abstract base class for atom invariants generators
abstract base class that generates atom-environments from a molecule
Std stuff.
Definition: Atom.h:30
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:229
class that generates same fingerprint style for different output formats
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...