RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
AtomPairs.h
Go to the documentation of this file.
1//
2// Copyright (C) 2007-2013 Greg Landrum
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11/*! \file AtomPairs.h
12
13
14 A few quick notes about fingerprint size and the way chirality is handled in
15 these functions.
16
17 By default the atom-pair and topologic-torsion fingerprints do not include any
18 information about
19 chirality; the atom invariants only include information about the atomic
20 number,
21 number of pi electrons, and degree.
22 When chirality is included, two additional bits are added to the atom
23 invariants to flag R/S/no
24 chirality. These additional bits change the size of the atom invariants and
25 either the size
26 of the final fingerprint (atom pairs) or the maximum allowed path length
27 (torsions). This means
28 that even fingerprints for achiral molecules are different when
29 includeChirality is true.
30
31*/
32#include <RDGeneral/export.h>
33#ifndef __RD_ATOMPAIRS_H__
34#define __RD_ATOMPAIRS_H__
35
38#include <cstdint>
40namespace RDKit {
41class Atom;
42
43namespace AtomPairs {
44const std::string atomPairsVersion = "1.1.0";
45
46//! returns the atom-pair fingerprint for a molecule
47/*!
48 The algorithm used is described here:
49 R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as
50 Molecular Features in Structure-Activity Studies: Definition
51 and Applications" JCICS 25, 64-73 (1985).
52
53
54 \param mol: the molecule to be fingerprinted
55 \param minLength: minimum distance between atoms to be
56 considered in a pair. Default is 1 bond.
57 \param maxLength: maximum distance between atoms to be
58 considered in a pair.
59 Default is maxPathLen-1 bonds.
60 \param fromAtoms: if provided, only atom pairs that involve
61 the specified atoms will be included in the
62 fingerprint
63 \param ignoreAtoms: if provided, any atom pairs that include
64 the specified atoms will not be included in the
65 fingerprint
66 \param atomInvariants: a list of invariants to use for the atom hashes
67 note: only the first \c codeSize bits of each
68 invariant are used.
69 \param includeChirality: if set, chirality will be used in the atom invariants
70 (note: this is ignored if atomInvariants are
71 provided)
72 \param use2D: if set, the 2D (topological) distance matrix is used.
73 \param confId: the conformation to use if 3D distances are being used
74
75
76 \return a pointer to the fingerprint. The client is
77 responsible for calling delete on this.
78
79*/
81 const ROMol &mol, unsigned int minLength, unsigned int maxLength,
82 const std::vector<std::uint32_t> *fromAtoms = nullptr,
83 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
84 const std::vector<std::uint32_t> *atomInvariants = nullptr,
85 bool includeChirality = false, bool use2D = true, int confId = -1);
86//! \overload
88 const ROMol &mol, const std::vector<std::uint32_t> *fromAtoms = nullptr,
89 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
90 const std::vector<std::uint32_t> *atomInvariants = nullptr,
91 bool includeChirality = false, bool use2D = true, int confId = -1);
92
93//! returns the hashed atom-pair fingerprint for a molecule
94/*!
95 \param mol: the molecule to be fingerprinted
96 \param nBits: the length of the fingerprint to generate
97 \param minLength: minimum distance between atoms to be
98 considered in a pair. Default is 1 bond.
99 \param maxLength: maximum distance between atoms to be
100 considered in a pair.
101 Default is maxPathLen-1 bonds.
102 \param fromAtoms: if provided, only atom pairs that involve
103 the specified atoms will be included in the
104 fingerprint
105 \param ignoreAtoms: if provided, any atom pairs that include
106 the specified atoms will not be included in the
107 fingerprint
108 \param atomInvariants: a list of invariants to use for the atom hashes
109 note: only the first \c codeSize bits of each
110 invariant are used.
111 \param includeChirality: if set, chirality will be used in the atom invariants
112 (note: this is ignored if atomInvariants are
113 provided)
114 \param use2D: if set, the 2D (topological) distance matrix is used.
115
116 \return a pointer to the fingerprint. The client is
117 responsible for calling delete on this.
118
119*/
122 const ROMol &mol, unsigned int nBits = 2048, unsigned int minLength = 1,
123 unsigned int maxLength = maxPathLen - 1,
124 const std::vector<std::uint32_t> *fromAtoms = nullptr,
125 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
126 const std::vector<std::uint32_t> *atomInvariants = nullptr,
127 bool includeChirality = false, bool use2D = true, int confId = -1);
128//! returns the hashed atom-pair fingerprint for a molecule as a bit vector
129/*!
130 \param mol: the molecule to be fingerprinted
131 \param nBits: the length of the fingerprint to generate
132 \param minLength: minimum distance between atoms to be
133 considered in a pair. Default is 1 bond.
134 \param maxLength: maximum distance between atoms to be
135 considered in a pair.
136 Default is maxPathLen-1 bonds.
137 \param fromAtoms: if provided, only atom pairs that involve
138 the specified atoms will be included in the
139 fingerprint
140 \param ignoreAtoms: if provided, any atom pairs that include
141 the specified atoms will not be included in the
142 fingerprint
143 \param atomInvariants: a list of invariants to use for the atom hashes
144 note: only the first \c codeSize bits of each
145 invariant are used.
146 \param nBitsPerEntry: number of bits to use in simulating counts
147 \param includeChirality: if set, chirality will be used in the atom invariants
148 (note: this is ignored if atomInvariants are
149 provided)
150 \param use2D: if set, the 2D (topological) distance matrix is used.
151 \param confId: the conformation to use if 3D distances are being used
152
153 \return a pointer to the fingerprint. The client is
154 responsible for calling delete on this.
155
156*/
159 const ROMol &mol, unsigned int nBits = 2048, unsigned int minLength = 1,
160 unsigned int maxLength = maxPathLen - 1,
161 const std::vector<std::uint32_t> *fromAtoms = nullptr,
162 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
163 const std::vector<std::uint32_t> *atomInvariants = nullptr,
164 unsigned int nBitsPerEntry = 4, bool includeChirality = false,
165 bool use2D = true, int confId = -1);
166
167//! returns the topological-torsion fingerprint for a molecule
168/*!
169 The algorithm used is described here:
170 R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
171 "Topological Torsion: A New Molecular Descriptor for SAR Applications.
172 Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
173
174 \param mol: the molecule to be fingerprinted
175 \param targetSize: the number of atoms to include in the "torsions"
176 \param fromAtoms: if provided, only torsions that start or end at
177 the specified atoms will be included in the
178 fingerprint
179 \param ignoreAtoms: if provided, any torsions that include
180 the specified atoms will not be included in the
181 fingerprint
182 \param atomInvariants: a list of invariants to use for the atom hashes
183 note: only the first \c codeSize bits of each
184 invariant are used.
185 \param includeChirality: if set, chirality will be used in the atom invariants
186 (note: this is ignored if atomInvariants are
187 provided)
188
189 \return a pointer to the fingerprint. The client is
190 responsible for calling delete on this.
191
192*/
195 const ROMol &mol, unsigned int targetSize = 4,
196 const std::vector<std::uint32_t> *fromAtoms = nullptr,
197 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
198 const std::vector<std::uint32_t> *atomInvariants = nullptr,
199 bool includeChirality = false);
200//! returns a hashed topological-torsion fingerprint for a molecule
201/*!
202 The algorithm used is described here:
203 R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
204 "Topological Torsion: A New Molecular Descriptor for SAR Applications.
205 Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
206
207 \param mol: the molecule to be fingerprinted
208 \param nBits: number of bits to include in the fingerprint
209 \param targetSize: the number of atoms to include in the "torsions"
210 \param fromAtoms: if provided, only torsions that start or end at
211 the specified atoms will be included in the
212 fingerprint
213 \param ignoreAtoms: if provided, any torsions that include
214 the specified atoms will not be included in the
215 fingerprint
216 \param atomInvariants: a list of invariants to use for the atom hashes
217 note: only the first \c codeSize bits of each
218 invariant are used.
219 \param includeChirality: if set, chirality will be used in the atom invariants
220 (note: this is ignored if atomInvariants are
221 provided)
222
223 \return a pointer to the fingerprint. The client is
224 responsible for calling delete on this.
225
226*/
229 const ROMol &mol, unsigned int nBits = 2048, unsigned int targetSize = 4,
230 const std::vector<std::uint32_t> *fromAtoms = nullptr,
231 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
232 const std::vector<std::uint32_t> *atomInvariants = nullptr,
233 bool includeChirality = false);
234//! returns a hashed topological-torsion fingerprint for a molecule as a bit
235/// vector
236/*!
237 \param mol: the molecule to be fingerprinted
238 \param nBits: number of bits to include in the fingerprint
239 \param targetSize: the number of atoms to include in the "torsions"
240 \param fromAtoms: if provided, only torsions that start or end at
241 the specified atoms will be included in the
242 fingerprint
243 \param ignoreAtoms: if provided, any torsions that include
244 the specified atoms will not be included in the
245 fingerprint
246 \param atomInvariants: a list of invariants to use for the atom hashes
247 note: only the first \c codeSize bits of each
248 invariant are used.
249 \param nBitsPerEntry: number of bits to use in simulating counts
250 \param includeChirality: if set, chirality will be used in the atom invariants
251 (note: this is ignored if atomInvariants are
252 provided)
253
254 \return a pointer to the fingerprint. The client is
255 responsible for calling delete on this.
256
257*/
260 const ROMol &mol, unsigned int nBits = 2048, unsigned int targetSize = 4,
261 const std::vector<std::uint32_t> *fromAtoms = nullptr,
262 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
263 const std::vector<std::uint32_t> *atomInvariants = nullptr,
264 unsigned int nBitsPerEntry = 4, bool includeChirality = false);
265} // namespace AtomPairs
266} // namespace RDKit
267
268#endif
Pulls in all the BitVect classes.
a class for bit vectors that are densely occupied
a class for efficiently storing sparse vectors of ints
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getTopologicalTorsionFingerprint(const ROMol &mol, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false)
returns the topological-torsion fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getHashedAtomPairFingerprint(const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the hashed atom-pair fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getHashedTopologicalTorsionFingerprint(const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false)
returns a hashed topological-torsion fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getHashedTopologicalTorsionFingerprintAsBitVect(const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false)
const std::string atomPairsVersion
Definition AtomPairs.h:44
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getHashedAtomPairFingerprintAsBitVect(const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the hashed atom-pair fingerprint for a molecule as a bit vector
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getAtomPairFingerprint(const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the atom-pair fingerprint for a molecule
const unsigned int maxPathLen
Std stuff.
bool rdvalue_is(const RDValue_cast_t)