RDKit
Open-source cheminformatics and machine learning.
RDKitFPGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_RDFINGERPRINTGEN_H_2018_07
13 #define RD_RDFINGERPRINTGEN_H_2018_07
14 
16 
17 namespace RDKit {
18 namespace RDKitFP {
19 
20 template <typename OutputType>
22  : public FingerprintArguments<OutputType> {
23  public:
24  const unsigned int d_minPath;
25  const unsigned int d_maxPath;
26  const bool df_useHs;
27  const bool df_branchedPaths;
28  const bool df_useBondOrder;
29 
30  OutputType getResultSize() const;
31 
32  std::string infoString() const;
33 
34  /**
35  \brief Construct a new RDKitFPArguments object
36 
37  \param minPath the minimum path length (in bonds) to be included
38  \param maxPath the maximum path length (in bonds) to be included
39  \param useHs toggles inclusion of Hs in paths (if the molecule has
40  explicit Hs)
41  \param branchedPaths toggles generation of branched subgraphs, not just
42  linear paths
43  \param useBondOrder toggles inclusion of bond orders in the path hashes
44  \param countSimulation if set, use count simulation while
45  generating the fingerprint
46  \param countBounds boundaries for count simulation, corresponding bit will
47  be set if the count is higher than the number provided for that spot
48  \param fpSize size of the generated fingerprint, does not affect the sparse
49  versions
50  \param numBitsPerFeature controls the number of bits that are set for each
51  path/subgraph found
52 
53  */
54  RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs,
55  bool branchedPaths, bool useBondOrder, bool countSimulation,
56  const std::vector<std::uint32_t> countBounds,
57  std::uint32_t fpSize, std::uint32_t numBitsPerFeature);
58 };
59 
61  : public AtomInvariantsGenerator {
62  public:
63  std::vector<std::uint32_t> *getAtomInvariants(const ROMol &mol) const;
64 
65  std::string infoString() const;
67 };
68 
69 template <typename OutputType>
71  : public AtomEnvironment<OutputType> {
72  const OutputType d_bitId;
73  const boost::dynamic_bitset<> d_atomsInPath;
74 
75  public:
77  const std::vector<std::uint32_t> *atomInvariants,
78  const std::vector<std::uint32_t> *bondInvariants,
79  const AdditionalOutput *additionalOutput,
80  bool hashResults = false) const;
81 
82  /**
83  \brief Construct a new RDKitFPAtomEnv object
84 
85  \param bitId bitId generated for this environment
86  \param atomsInPath holds atoms in this environment to set additional output
87 
88  */
89  RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath);
90 };
91 
92 template <typename OutputType>
94  : public AtomEnvironmentGenerator<OutputType> {
95  public:
96  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
97  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
98  const std::vector<std::uint32_t> *fromAtoms,
99  const std::vector<std::uint32_t> *ignoreAtoms, int confId,
100  const AdditionalOutput *additionalOutput,
101  const std::vector<std::uint32_t> *atomInvariants,
102  const std::vector<std::uint32_t> *bondInvariants,
103  bool hashResults = false) const;
104 
105  std::string infoString() const;
106 };
107 
108 /**
109  \brief Get a RDKit fingerprint generator with given parameters
110 
111  \tparam OutputType determines the size of the bitIds and the result, can be 32
112  or 64 bit unsigned integer
113  \param minPath the minimum path length (in bonds) to be included
114  \param maxPath the maximum path length (in bonds) to be included
115  \param useHs toggles inclusion of Hs in paths (if the molecule has
116  explicit Hs)
117  \param branchedPaths toggles generation of branched subgraphs, not just
118  linear paths
119  \param useBondOrder toggles inclusion of bond orders in the path hashes
120  \param atomInvariantsGenerator custom atom invariants generator to use
121  \param countSimulation if set, use count simulation while
122  generating the fingerprint
123  \param countBounds boundaries for count simulation, corresponding bit will be
124  set if the count is higher than the number provided for that spot
125  \param fpSize size of the generated fingerprint, does not affect the sparse
126  versions
127  \param numBitsPerFeature controls the number of bits that are set for each
128  path/subgraph found
129  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
130  fingerprint generator
131 
132  /return FingerprintGenerator<OutputType>* that generated RDKit fingerprints
133  */
134 template <typename OutputType>
136  unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
137  bool branchedPaths = true, bool useBondOrder = true,
138  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
139  bool countSimulation = false,
140  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
141  std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
142  bool ownsAtomInvGen = false);
143 
144 } // namespace RDKitFP
145 } // namespace RDKit
146 
147 #endif
RDKit::RDKitFP::RDKitFPAtomEnv
Definition: RDKitFPGenerator.h:71
RDKit::RDKitFP::RDKitFPAtomInvGenerator::clone
RDKitFPAtomInvGenerator * clone() const
RDKit::FingerprintGenerator
class that generates same fingerprint style for different output formats
Definition: FingerprintGenerator.h:240
RDKit::RDKitFP
@ RDKitFP
Definition: ReactionFingerprints.h:47
RDKit::RDKitFP::RDKitFPArguments::df_useBondOrder
const bool df_useBondOrder
Definition: RDKitFPGenerator.h:28
RDKit::RDKitFP::RDKitFPEnvGenerator
Definition: RDKitFPGenerator.h:94
RDKit::AdditionalOutput
Definition: FingerprintGenerator.h:23
RDKit::RDKitFP::RDKitFPArguments::df_useHs
const bool df_useHs
Definition: RDKitFPGenerator.h:26
RDKit::RDKitFP::RDKitFPArguments::df_branchedPaths
const bool df_branchedPaths
Definition: RDKitFPGenerator.h:27
RDKit::RDKitFP::RDKitFPAtomEnv::RDKitFPAtomEnv
RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath)
Construct a new RDKitFPAtomEnv object.
RDKit::ROMol
Definition: ROMol.h:171
RDKit::RDKitFP::RDKitFPEnvGenerator::getEnvironments
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, bool hashResults=false) const
generate and return all atom-envorinments from a molecule
RDKit::AtomInvariantsGenerator
abstract base class for atom invariants generators
Definition: FingerprintGenerator.h:178
RDKit::RDKitFP::RDKitFPArguments::d_maxPath
const unsigned int d_maxPath
Definition: RDKitFPGenerator.h:25
RDKit::RDKitFP::RDKitFPArguments::d_minPath
const unsigned int d_minPath
Definition: RDKitFPGenerator.h:24
FingerprintGenerator.h
RDKit::RDKitFP::RDKitFPArguments::infoString
std::string infoString() const
method that returns information string about the fingerprint specific argument set and the arguments ...
RDKit::RDKitFP::getRDKitFPGenerator
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
RDKit::RDKitFP::RDKitFPAtomInvGenerator::getAtomInvariants
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const
get atom invariants from a molecule
RDKit::AtomEnvironment
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
Definition: FingerprintGenerator.h:96
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::AtomEnvironmentGenerator
abstract base class that generates atom-environments from a molecule
Definition: FingerprintGenerator.h:124
RDKit::RDKitFP::RDKitFPAtomInvGenerator::infoString
std::string infoString() const
method that returns information about this /c AtomInvariantsGenerator and its arguments
RDKit::RDKitFP::RDKitFPArguments::getResultSize
OutputType getResultSize() const
Returns the size of the fingerprint based on arguments.
RDKIT_FINGERPRINTS_EXPORT
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:242
RDKit::RDKitFP::RDKitFPArguments::RDKitFPArguments
RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature)
Construct a new RDKitFPArguments object.
RDKit::RDKitFP::RDKitFPEnvGenerator::infoString
std::string infoString() const
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
RDKit::RDKitFP::RDKitFPAtomInvGenerator
Definition: RDKitFPGenerator.h:61
RDKit::RDKitFP::RDKitFPAtomEnv::getBitId
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, bool hashResults=false) const
calculates and returns the bit id to be set for this atom-environment
RDKit::RDKitFP::RDKitFPArguments
Definition: RDKitFPGenerator.h:22
RDKit::FingerprintArguments
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
Definition: FingerprintGenerator.h:53
export.h