RDKit
Open-source cheminformatics and machine learning.
Fingerprints.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2012 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FINGERPRINTS_H_
12 #define _RD_FINGERPRINTS_H_
13 
14 #include <vector>
15 #include <boost/cstdint.hpp>
17 
18 class ExplicitBitVect;
19 namespace RDKit {
20 class ROMol;
21 
22 //! \brief Generates a topological (Daylight like) fingerprint for a molecule
23 //! using an alternate (faster) hashing algorithm
24 /*!
25 
26  \param mol: the molecule to be fingerprinted
27  \param minPath: the minimum path length (in bonds) to be included
28  \param maxPath: the minimum path length (in bonds) to be included
29  \param fpSize: the size of the fingerprint
30  \param nBitsPerHash: the number of bits to be set by each path
31  \param useHs: toggles inclusion of Hs in paths (if the molecule has
32  explicit Hs)
33  \param tgtDensity: if the generated fingerprint is below this density, it
34  will
35  be folded until the density is reached.
36  \param minSize: the minimum size to which the fingerprint will be
37  folded
38  \param branchedPaths: toggles generation of branched subgraphs, not just
39  linear paths
40  \param useBondOrders: toggles inclusion of bond orders in the path hashes
41  \param atomInvariants: a vector of atom invariants to use while hashing the
42  paths
43  \param fromAtoms: only paths starting at these atoms will be included
44  \param atomBits: used to return the bits that each atom is involved in
45  (should be at least \c mol.numAtoms long)
46 
47  \return the molecular fingerprint, as an ExplicitBitVect
48 
49  <b>Notes:</b>
50  - the caller is responsible for <tt>delete</tt>ing the result
51 
52 */
54  const ROMol &mol, unsigned int minPath = 1, unsigned int maxPath = 7,
55  unsigned int fpSize = 2048, unsigned int nBitsPerHash = 2,
56  bool useHs = true, double tgtDensity = 0.0, unsigned int minSize = 128,
57  bool branchedPaths = true, bool useBondOrder = true,
58  std::vector<boost::uint32_t> *atomInvariants = 0,
59  const std::vector<boost::uint32_t> *fromAtoms = 0,
60  std::vector<std::vector<boost::uint32_t> > *atomBits = 0,
61  std::map<boost::uint32_t,std::vector<std::vector<int> > > *bitInfo=0);
62 const std::string RDKFingerprintMolVersion = "2.0.0";
63 
64 //! \brief Generates a topological (Daylight like) fingerprint for a molecule
65 //! using a layer-based hashing algorithm
66 /*!
67 
68  <b>Experimental:</b> This function is experimental. The API or results may
69  change from
70  release to release.
71 
72  \param mol: the molecule to be fingerprinted
73  \param layerFlags: the layers to be included (see below)
74  \param minPath: the minimum path length (in bonds) to be included
75  \param maxPath: the minimum path length (in bonds) to be included
76  \param fpSize: the size of the fingerprint
77  \param atomCounts: if provided, this will be used to provide the count of
78  the number
79  of paths that set bits each atom is involved in. The
80  vector should
81  have at least as many entries as the molecule has atoms
82  and is not
83  zeroed out here.
84  \param setOnlyBits: if provided, only bits that are set in this bit vector
85  will be set
86  in the result. This is essentially the same as doing:
87  (*res) &= (*setOnlyBits);
88  but also has an impact on the atomCounts (if being used)
89  \param branchedPaths: toggles generation of branched subgraphs, not just
90  linear paths
91 
92  \return the molecular fingerprint, as an ExplicitBitVect
93 
94  <b>Notes:</b>
95  - the caller is responsible for <tt>delete</tt>ing the result
96 
97  <b>Layer definitions:</b>
98  - 0x01: pure topology
99  - 0x02: bond order
100  - 0x04: atom types
101  - 0x08: presence of rings
102  - 0x10: ring sizes
103  - 0x20: aromaticity
104 */
106  const ROMol &mol, unsigned int layerFlags = 0xFFFFFFFF,
107  unsigned int minPath = 1, unsigned int maxPath = 7,
108  unsigned int fpSize = 2048, std::vector<unsigned int> *atomCounts = 0,
109  ExplicitBitVect *setOnlyBits = 0, bool branchedPaths = true,
110  const std::vector<boost::uint32_t> *fromAtoms = 0);
111 const unsigned int maxFingerprintLayers = 10;
112 const std::string LayeredFingerprintMolVersion = "0.7.0";
113 const unsigned int substructLayers = 0x07;
114 
115 //! \brief Generates a topological fingerprint for a molecule
116 //! using a series of pre-defined structural patterns
117 /*!
118 
119  <b>Experimental:</b> This function is experimental. The API or results may
120  change from
121  release to release.
122 
123  \param mol: the molecule to be fingerprinted
124  \param fpSize: the size of the fingerprint
125  \param atomCounts: if provided, this will be used to provide the count of
126  the number
127  of paths that set bits each atom is involved in. The
128  vector should
129  have at least as many entries as the molecule has atoms
130  and is not
131  zeroed out here.
132  \param setOnlyBits: if provided, only bits that are set in this bit vector
133  will be set
134  in the result. This is essentially the same as doing:
135  (*res) &= (*setOnlyBits);
136  but also has an impact on the atomCounts (if being used)
137 
138  \return the molecular fingerprint, as an ExplicitBitVect
139 
140  <b>Notes:</b>
141  - the caller is responsible for <tt>delete</tt>ing the result
142 
143 */
145  const ROMol &mol, unsigned int fpSize = 2048,
146  std::vector<unsigned int> *atomCounts = 0,
147  ExplicitBitVect *setOnlyBits = 0);
148 
150  unsigned int maxPath=7,
151  bool useHs=true,
152  bool branchedPaths=true,
153  bool useBondOrder=true,
154  std::vector<boost::uint32_t> *atomInvariants=0,
155  const std::vector<boost::uint32_t> *fromAtoms=0,
156  std::vector<std::vector<boost::uint64_t> > *atomBits=0,
157  std::map<boost::uint64_t,std::vector<std::vector<int> > > *bitInfo=0);
158 
159 }
160 
161 
162 #endif
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns...
const unsigned int maxFingerprintLayers
Definition: Fingerprints.h:111
const unsigned int substructLayers
Definition: Fingerprints.h:113
const std::string RDKFingerprintMolVersion
Definition: Fingerprints.h:62
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, unsigned int nBitsPerHash=2, bool useHs=true, double tgtDensity=0.0, unsigned int minSize=128, bool branchedPaths=true, bool useBondOrder=true, std::vector< boost::uint32_t > *atomInvariants=0, const std::vector< boost::uint32_t > *fromAtoms=0, std::vector< std::vector< boost::uint32_t > > *atomBits=0, std::map< boost::uint32_t, std::vector< std::vector< int > > > *bitInfo=0)
Generates a topological (Daylight like) fingerprint for a molecule using an alternate (faster) hashin...
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * LayeredFingerprintMol(const ROMol &mol, unsigned int layerFlags=0xFFFFFFFF, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0, bool branchedPaths=true, const std::vector< boost::uint32_t > *fromAtoms=0)
Generates a topological (Daylight like) fingerprint for a molecule using a layer-based hashing algori...
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::uint64_t > * getUnfoldedRDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, std::vector< boost::uint32_t > *atomInvariants=0, const std::vector< boost::uint32_t > *fromAtoms=0, std::vector< std::vector< boost::uint64_t > > *atomBits=0, std::map< boost::uint64_t, std::vector< std::vector< int > > > *bitInfo=0)
Std stuff.
Definition: Atom.h:30
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:229
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:28
const std::string LayeredFingerprintMolVersion
Definition: Fingerprints.h:112
a class for bit vectors that are densely occupied