RDKit
Open-source cheminformatics and machine learning.
Fingerprints.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2012 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_FINGERPRINTS_H_
11 #define _RD_FINGERPRINTS_H_
12 
13 #include <vector>
14 #include <boost/cstdint.hpp>
15 
16 class ExplicitBitVect;
17 namespace RDKit{
18  class ROMol;
19 
20  //! \brief Generates a topological (Daylight like) fingerprint for a molecule
21  //! using an alternate (faster) hashing algorithm
22  /*!
23 
24  \param mol: the molecule to be fingerprinted
25  \param minPath: the minimum path length (in bonds) to be included
26  \param maxPath: the minimum path length (in bonds) to be included
27  \param fpSize: the size of the fingerprint
28  \param nBitsPerHash: the number of bits to be set by each path
29  \param useHs: toggles inclusion of Hs in paths (if the molecule has explicit Hs)
30  \param tgtDensity: if the generated fingerprint is below this density, it will
31  be folded until the density is reached.
32  \param minSize: the minimum size to which the fingerprint will be
33  folded
34  \param branchedPaths: toggles generation of branched subgraphs, not just linear paths
35  \param useBondOrders: toggles inclusion of bond orders in the path hashes
36  \param atomInvariants: a vector of atom invariants to use while hashing the paths
37  \param fromAtoms: only paths starting at these atoms will be included
38  \param atomBits: used to return the bits that each atom is involved in
39  (should be at least \c mol.numAtoms long)
40 
41  \return the molecular fingerprint, as an ExplicitBitVect
42 
43  <b>Notes:</b>
44  - the caller is responsible for <tt>delete</tt>ing the result
45 
46  */
47  ExplicitBitVect *RDKFingerprintMol(const ROMol &mol,
48  unsigned int minPath=1,
49  unsigned int maxPath=7,
50  unsigned int fpSize=2048,
51  unsigned int nBitsPerHash=2,
52  bool useHs=true,
53  double tgtDensity=0.0,
54  unsigned int minSize=128,
55  bool branchedPaths=true,
56  bool useBondOrder=true,
57  std::vector<boost::uint32_t> *atomInvariants=0,
58  const std::vector<boost::uint32_t> *fromAtoms=0,
59  std::vector<std::vector<boost::uint32_t> > *atomBits=0
60  );
61  const std::string RDKFingerprintMolVersion="2.0.0";
62 
63 
64  //! \brief Generates a topological (Daylight like) fingerprint for a molecule
65  //! using a layer-based hashing algorithm
66  /*!
67 
68  <b>Experimental:</b> This function is experimental. The API or results may change from
69  release to release.
70 
71  \param mol: the molecule to be fingerprinted
72  \param layerFlags: the layers to be included (see below)
73  \param minPath: the minimum path length (in bonds) to be included
74  \param maxPath: the minimum path length (in bonds) to be included
75  \param fpSize: the size of the fingerprint
76  \param atomCounts: if provided, this will be used to provide the count of the number
77  of paths that set bits each atom is involved in. The vector should
78  have at least as many entries as the molecule has atoms and is not
79  zeroed out here.
80  \param setOnlyBits: if provided, only bits that are set in this bit vector will be set
81  in the result. This is essentially the same as doing:
82  (*res) &= (*setOnlyBits);
83  but also has an impact on the atomCounts (if being used)
84  \param branchedPaths: toggles generation of branched subgraphs, not just linear paths
85 
86  \return the molecular fingerprint, as an ExplicitBitVect
87 
88  <b>Notes:</b>
89  - the caller is responsible for <tt>delete</tt>ing the result
90 
91  <b>Layer definitions:</b>
92  - 0x01: pure topology
93  - 0x02: bond order
94  - 0x04: atom types
95  - 0x08: presence of rings
96  - 0x10: ring sizes
97  - 0x20: aromaticity
98  */
100  unsigned int layerFlags=0xFFFFFFFF,
101  unsigned int minPath=1,unsigned int maxPath=7,
102  unsigned int fpSize=2048,
103  std::vector<unsigned int> *atomCounts=0,
104  ExplicitBitVect *setOnlyBits=0,
105  bool branchedPaths=true,
106  const std::vector<boost::uint32_t> *fromAtoms=0
107  );
108  const unsigned int maxFingerprintLayers=10;
109  const std::string LayeredFingerprintMolVersion="0.7.0";
110  const unsigned int substructLayers=0x07;
111 
112  //! \brief Generates a topological fingerprint for a molecule
113  //! using a series of pre-defined structural patterns
114  /*!
115 
116  <b>Experimental:</b> This function is experimental. The API or results may change from
117  release to release.
118 
119  \param mol: the molecule to be fingerprinted
120  \param fpSize: the size of the fingerprint
121  \param atomCounts: if provided, this will be used to provide the count of the number
122  of paths that set bits each atom is involved in. The vector should
123  have at least as many entries as the molecule has atoms and is not
124  zeroed out here.
125  \param setOnlyBits: if provided, only bits that are set in this bit vector will be set
126  in the result. This is essentially the same as doing:
127  (*res) &= (*setOnlyBits);
128  but also has an impact on the atomCounts (if being used)
129 
130  \return the molecular fingerprint, as an ExplicitBitVect
131 
132  <b>Notes:</b>
133  - the caller is responsible for <tt>delete</tt>ing the result
134 
135  */
137  unsigned int fpSize=2048,
138  std::vector<unsigned int> *atomCounts=0,
139  ExplicitBitVect *setOnlyBits=0);
140 
141  namespace Fingerprints {
142  namespace detail {
143  bool isComplexQuery(const Bond *b);
144  bool isComplexQuery(const Atom *a);
145  bool isAtomAromatic(const Atom *a);
146  }
147  }
148 }
149 
150 #endif
ExplicitBitVect * LayeredFingerprintMol(const ROMol &mol, unsigned int layerFlags=0xFFFFFFFF, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0, bool branchedPaths=true, const std::vector< boost::uint32_t > *fromAtoms=0)
Generates a topological (Daylight like) fingerprint for a molecule using a layer-based hashing algori...
ExplicitBitVect * RDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, unsigned int nBitsPerHash=2, bool useHs=true, double tgtDensity=0.0, unsigned int minSize=128, bool branchedPaths=true, bool useBondOrder=true, std::vector< boost::uint32_t > *atomInvariants=0, const std::vector< boost::uint32_t > *fromAtoms=0, std::vector< std::vector< boost::uint32_t > > *atomBits=0)
Generates a topological (Daylight like) fingerprint for a molecule using an alternate (faster) hashin...
const unsigned int maxFingerprintLayers
Definition: Fingerprints.h:108
const unsigned int substructLayers
Definition: Fingerprints.h:110
const std::string RDKFingerprintMolVersion
Definition: Fingerprints.h:61
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
Definition: types.h:23
ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns...
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
bool isAtomAromatic(const Atom *a)
class for representing a bond
Definition: Bond.h:46
bool isComplexQuery(const Bond *b)
const std::string LayeredFingerprintMolVersion
Definition: Fingerprints.h:109
a class for bit vectors that are densely occupied
The class for representing atoms.
Definition: Atom.h:67