RDKit
Open-source cheminformatics and machine learning.
FingerprintUtil.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_FINGERPRINTUTIL_H_2018_07
13 #define RD_FINGERPRINTUTIL_H_2018_07
14 
15 #include <GraphMol/RDKitBase.h>
17 #include <DataStructs/BitVects.h>
18 #include <boost/cstdint.hpp>
19 
20 #include <vector>
21 #include <map>
23 
25 
26 namespace RDKit {
27 namespace AtomPairs {
28 const unsigned int numTypeBits = 4;
29 const unsigned int atomNumberTypes[1 << numTypeBits] = {
30  5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 51, 52, 43};
31 const unsigned int numPiBits = 2;
32 const unsigned int maxNumPi = (1 << numPiBits) - 1;
33 const unsigned int numBranchBits = 3;
34 const unsigned int maxNumBranches = (1 << numBranchBits) - 1;
35 const unsigned int numChiralBits = 2;
36 const unsigned int codeSize = numTypeBits + numPiBits + numBranchBits;
37 const unsigned int numPathBits = 5;
38 const unsigned int maxPathLen = (1 << numPathBits) - 1;
39 const unsigned int numAtomPairFingerprintBits =
40  numPathBits + 2 * codeSize; // note that this is only accurate if chirality
41  // is not included
42 
43 //! returns a numeric code for the atom (the atom's hash in the
44 //! atom-pair scheme)
45 /*!
46  \param atom the atom to be considered
47  \param branchSubtract (optional) a constant to subtract from
48  the number of neighbors when the hash
49  is calculated (used in the topological
50  torsions code)
51  \param includeChirality toggles the inclusions of bits indicating R/S
52  chirality
53 */
55  const Atom *atom, unsigned int branchSubtract = 0,
56  bool includeChirality = false);
57 
58 //! returns an atom pair hash based on two atom hashes and the
59 //! distance between the atoms.
60 /*!
61  \param codeI the hash for the first atom
62  \param codeJ the hash for the second atom
63  \param dist the distance (number of bonds) between the two
64  atoms
65  \param includeChirality toggles the inclusions of bits indicating R/S
66  chirality
67 */
69  std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist,
70  bool includeChirality = false);
71 
72 //! returns an topological torsion hash based on the atom hashes
73 //! passed in
74 /*!
75  \param atomCodes the vector of atom hashes
76 */
78  const std::vector<std::uint32_t> &atomCodes, bool includeChirality = false);
79 
81  const std::vector<std::uint32_t> &pathCodes);
82 } // namespace AtomPairs
83 
84 namespace MorganFingerprints {
85 
87  public:
88  ss_matcher();
89  ss_matcher(const std::string &pattern);
90 
91  // const RDKit::ROMOL_SPTR &getMatcher() const { return m_matcher; };
92  const RDKit::ROMol *getMatcher() const;
93 
94  private:
95  RDKit::ROMOL_SPTR m_matcher;
96 };
97 
98 typedef boost::tuple<boost::dynamic_bitset<>, uint32_t, unsigned int>
100 
101 RDKIT_FINGERPRINTS_EXPORT extern std::vector<std::string> defaultFeatureSmarts;
102 
103 //! returns the connectivity invariants for a molecule
104 /*!
105 
106  \param mol : the molecule to be considered
107  \param invars : used to return the results
108  \param includeRingMembership : if set, whether or not the atom is in
109  a ring will be used in the invariant list.
110 */
112  const ROMol &mol, std::vector<boost::uint32_t> &invars,
113  bool includeRingMembership = true);
114 const std::string morganConnectivityInvariantVersion = "1.0.0";
115 
116 //! returns the feature invariants for a molecule
117 /*!
118 
119  \param mol: the molecule to be considered
120  \param invars : used to return the results
121  \param patterns: if provided should contain the queries used to assign
122  atom-types.
123  if not provided, feature definitions adapted from reference:
124  Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
125  will be used for Donor, Acceptor, Aromatic, Halogen, Basic,
126  Acidic
127 
128 */
130  const ROMol &mol, std::vector<boost::uint32_t> &invars,
131  std::vector<const ROMol *> *patterns = 0);
132 const std::string morganFeatureInvariantVersion = "0.1.0";
133 
134 } // namespace MorganFingerprints
135 
136 namespace RDKitFPUtils {
137 
139  const ROMol &mol, std::vector<boost::uint32_t> &lAtomInvariants);
140 
142  const ROMol &mol, std::map<int, std::list<std::vector<int>>> &allPaths,
143  const std::vector<boost::uint32_t> *fromAtoms, bool branchedPaths,
144  bool useHs, unsigned int minPath, unsigned int maxPath);
145 
147  const ROMol &mol, std::vector<const Bond *> &bondCache,
148  std::vector<short> &isQueryBond);
149 
150 RDKIT_FINGERPRINTS_EXPORT std::vector<unsigned int> generateBondHashes(
151  const ROMol &mol, boost::dynamic_bitset<> &atomsInPath,
152  const std::vector<const Bond *> &bondCache,
153  const std::vector<short> &isQueryBond, const std::vector<int> &path,
154  bool useBondOrder, const std::vector<boost::uint32_t> *atomInvariants);
155 
156 } // namespace RDKitFPUtils
157 
158 } // namespace RDKit
159 
160 #endif
boost::tuple< boost::dynamic_bitset<>, uint32_t, unsigned int > AccumTuple
RDKIT_FINGERPRINTS_EXPORT void buildDefaultRDKitFingerprintAtomInvariants(const ROMol &mol, std::vector< boost::uint32_t > &lAtomInvariants)
Pulls in all the BitVect classes.
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomPairCode(std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist, bool includeChirality=false)
RDKIT_FINGERPRINTS_EXPORT void getFeatureInvariants(const ROMol &mol, std::vector< boost::uint32_t > &invars, std::vector< const ROMol *> *patterns=0)
returns the feature invariants for a molecule
RDKIT_FINGERPRINTS_EXPORT void getConnectivityInvariants(const ROMol &mol, std::vector< boost::uint32_t > &invars, bool includeRingMembership=true)
returns the connectivity invariants for a molecule
const unsigned int maxPathLen
const unsigned int numPathBits
const unsigned int maxNumPi
RDKIT_FINGERPRINTS_EXPORT std::vector< unsigned int > generateBondHashes(const ROMol &mol, boost::dynamic_bitset<> &atomsInPath, const std::vector< const Bond *> &bondCache, const std::vector< short > &isQueryBond, const std::vector< int > &path, bool useBondOrder, const std::vector< boost::uint32_t > *atomInvariants)
RDKIT_FINGERPRINTS_EXPORT std::vector< std::string > defaultFeatureSmarts
pulls in the core RDKit functionality
const std::string morganFeatureInvariantVersion
const unsigned int numPiBits
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomCode(const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false)
const std::string morganConnectivityInvariantVersion
boost::shared_ptr< ROMol > ROMOL_SPTR
Std stuff.
Definition: Atom.h:30
const unsigned int maxNumBranches
const unsigned int numBranchBits
RDKIT_FINGERPRINTS_EXPORT std::uint64_t getTopologicalTorsionCode(const std::vector< std::uint32_t > &atomCodes, bool includeChirality=false)
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:229
const unsigned int numAtomPairFingerprintBits
RDKIT_FINGERPRINTS_EXPORT void identifyQueryBonds(const ROMol &mol, std::vector< const Bond *> &bondCache, std::vector< short > &isQueryBond)
RDKIT_FINGERPRINTS_EXPORT void enumerateAllPaths(const ROMol &mol, std::map< int, std::list< std::vector< int >>> &allPaths, const std::vector< boost::uint32_t > *fromAtoms, bool branchedPaths, bool useHs, unsigned int minPath, unsigned int maxPath)
functionality for finding subgraphs and paths in molecules
const unsigned int codeSize
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getTopologicalTorsionHash(const std::vector< std::uint32_t > &pathCodes)
const unsigned int atomNumberTypes[1<< numTypeBits]
The class for representing atoms.
Definition: Atom.h:69
const unsigned int numChiralBits
const unsigned int numTypeBits