 |
RDKit
Open-source cheminformatics and machine learning.
|
Go to the documentation of this file.
24 namespace MHFPFingerprints {
28 const uint32_t
prime = 0x01000193;
29 const uint32_t
seed = 0x811C9DC5;
34 const unsigned char* ptr = (
const unsigned char*)str.c_str();
35 size_t len = str.length();
60 unsigned int seed = 42);
115 std::vector<std::string>
117 unsigned char radius = 3,
119 bool isomeric =
false,
120 bool kekulize =
true,
121 unsigned char min_radius = 1);
124 std::vector<std::string>
126 unsigned char radius = 3,
128 bool isomeric =
false,
129 bool kekulize =
true,
130 unsigned char min_radius = 1);
155 std::vector<uint32_t>
157 unsigned char radius = 3,
159 bool isomeric =
false,
160 bool kekulize =
true,
161 unsigned char min_radius = 1);
164 std::vector<std::vector<uint32_t>>
166 unsigned char radius = 3,
168 bool isomeric =
false,
169 bool kekulize =
true,
170 unsigned char min_radius = 1);
173 std::vector<uint32_t>
175 unsigned char radius = 3,
177 bool isomeric =
false,
178 bool kekulize =
true,
179 unsigned char min_radius = 1);
182 std::vector<std::vector<uint32_t>>
184 unsigned char radius = 3,
186 bool isomeric =
false,
187 bool kekulize =
true,
188 unsigned char min_radius = 1);
217 unsigned char radius = 3,
219 bool isomeric =
false,
220 bool kekulize =
true,
221 unsigned char min_radius = 1,
222 size_t length = 2048);
225 std::vector<ExplicitBitVect>
227 unsigned char radius = 3,
229 bool isomeric =
false,
230 bool kekulize =
true,
231 unsigned char min_radius = 1,
232 size_t length = 2048);
237 unsigned char radius = 3,
239 bool isomeric =
false,
240 bool kekulize =
true,
241 unsigned char min_radius = 1,
242 size_t length = 2048);
245 std::vector<ExplicitBitVect>
247 unsigned char radius = 3,
249 bool isomeric =
false,
250 bool kekulize =
true,
251 unsigned char min_radius = 1,
252 size_t length = 2048);
264 const std::vector<uint32_t>& b) {
267 for (
size_t i = 0; i < a.size(); i++)
271 return matches / (double)a.size();
277 FastMod(
const uint64_t input,
const uint64_t ceil) {
278 return input >= ceil ? input % ceil : input;
282 Fold(
const std::vector<uint32_t>& vec, uint32_t length = 2048) {
284 for (
size_t i = 0; i < vec.size(); i++)
285 ebv.setBit(vec[i] % length);
289 std::vector<uint32_t>
290 HashShingling(std::vector<std::string> vec) {
291 std::vector<uint32_t> result(vec.size());
292 for (
size_t i = 0; i < vec.size(); i++)
297 unsigned int n_permutations_, seed_;
298 uint64_t prime_ = 2305843009213693951UL;
299 uint32_t max_hash_ = 4294967295;
300 std::vector<uint32_t> perms_a_;
301 std::vector<uint32_t> perms_b_;
ExplicitBitVect EncodeSECFP(std::string &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1, size_t length=2048)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::string > CreateShingling(const ROMol &mol, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1)
Creates a molecular shingling based on circular substructures.
Defines the primary molecule class ROMol as well as associated typedefs.
std::vector< std::vector< uint32_t > > Encode(std::vector< std::string > &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
static double Distance(const std::vector< uint32_t > &a, const std::vector< uint32_t > &b)
Calculates the Jaccard / Tanimoto distance between two MHFP fingerprints.
std::vector< uint32_t > Encode(std::string &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ExplicitBitVect > EncodeSECFP(std::vector< std::string > &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1, size_t length=2048)
This is an overloaded member function, provided for convenience. It differs from the above function o...
MHFPEncoder(unsigned int n_permutations=2048, unsigned int seed=42)
Constructor.
std::vector< uint32_t > FromStringArray(const std::vector< std::string > &vec)
Creates a MinHash from a vector of strings.
std::vector< uint32_t > Encode(ROMol &mol, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1)
Creates a MinHash vector from a molecule.
std::vector< std::vector< uint32_t > > Encode(std::vector< ROMol > &mols, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
ExplicitBitVect EncodeSECFP(ROMol &mol, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1, size_t length=2048)
Creates a binary fingerprint based on circular sub-SMILES.
a class for bit vectors that are sparsely occupied.
uint32_t hash(const std::string &str, uint32_t hash=seed)
A simple implementation of the Fowler–Noll–Vo hash function.
#define RDKIT_FINGERPRINTS_EXPORT
std::vector< std::string > CreateShingling(const std::string &smiles, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< uint32_t > FromArray(const std::vector< uint32_t > &vec)
Creates a MinHash from a list of unsigned integers.
const std::string mhfpFingerprintVersion
a class for bit vectors that are densely occupied
std::vector< ExplicitBitVect > EncodeSECFP(std::vector< ROMol > &mols, unsigned char radius=3, bool rings=true, bool isomeric=false, bool kekulize=true, unsigned char min_radius=1, size_t length=2048)
This is an overloaded member function, provided for convenience. It differs from the above function o...