![]() |
RDKit
Open-source cheminformatics and machine learning.
|
Contains general bit-comparison and similarity operations. More...
Go to the source code of this file.
Functions | |
template<typename T > | |
double | SimilarityWrapper (const T &bv1, const T &bv2, double(*metric)(const T &, const T &), bool returnDistance=false) |
template<typename T > | |
double | SimilarityWrapper (const T &bv1, const T &bv2, double a, double b, double(*metric)(const T &, const T &, double, double), bool returnDistance=false) |
bool | AllProbeBitsMatch (const char *probe, const char *ref) |
bool | AllProbeBitsMatch (const std::string &probe, const std::string &ref) |
bool | AllProbeBitsMatch (const ExplicitBitVect &probe, const ExplicitBitVect &ref) |
template<typename T1 > | |
bool | AllProbeBitsMatch (const T1 &probe, const std::string &pkl) |
template<typename T1 > | |
bool | AllProbeBitsMatch (const T1 &probe, const T1 &ref) |
template<typename T1 , typename T2 > | |
int | NumOnBitsInCommon (const T1 &bv1, const T2 &bv2) |
returns the number of on bits in common between two bit vectors More... | |
int | NumOnBitsInCommon (const ExplicitBitVect &bv1, const ExplicitBitVect &bv2) |
template<typename T1 , typename T2 > | |
double | TanimotoSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Tanimoto similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | CosineSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Cosine similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | KulczynskiSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Kulczynski similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | DiceSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Dice similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | TverskySimilarity (const T1 &bv1, const T2 &bv2, double a, double b) |
returns the Tversky similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | SokalSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Sokal similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | McConnaugheySimilarity (const T1 &bv1, const T2 &bv2) |
returns the McConnaughey similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | AsymmetricSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Asymmetric similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | BraunBlanquetSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Braun-Blanquet similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | RusselSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Russel similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | RogotGoldbergSimilarity (const T1 &bv1, const T2 &bv2) |
returns the Rogot-Goldberg similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
double | OnBitSimilarity (const T1 &bv1, const T2 &bv2) |
returns the on bit similarity between two bit vects More... | |
template<typename T1 , typename T2 > | |
int | NumBitsInCommon (const T1 &bv1, const T2 &bv2) |
returns the number of common bits (on and off) between two bit vects More... | |
int | NumBitsInCommon (const ExplicitBitVect &bv1, const ExplicitBitVect &bv2) |
template<typename T1 , typename T2 > | |
double | AllBitSimilarity (const T1 &bv1, const T2 &bv2) |
template<typename T1 , typename T2 > | |
IntVect | OnBitsInCommon (const T1 &bv1, const T2 &bv2) |
returns an IntVect with indices of all on bits in common between two bit vects More... | |
template<typename T1 , typename T2 > | |
IntVect | OffBitsInCommon (const T1 &bv1, const T2 &bv2) |
returns an IntVect with indices of all off bits in common between two bit vects More... | |
template<typename T1 , typename T2 > | |
DoubleVect | OnBitProjSimilarity (const T1 &bv1, const T2 &bv2) |
returns the on-bit projected similarities between two bit vects More... | |
template<typename T1 , typename T2 > | |
DoubleVect | OffBitProjSimilarity (const T1 &bv1, const T2 &bv2) |
returns the on-bit projected similarities between two bit vects More... | |
template<typename T1 > | |
T1 * | FoldFingerprint (const T1 &bv1, unsigned int factor=2) |
folds a bit vector factor times and returns the result More... | |
template<typename T1 > | |
std::string | BitVectToText (const T1 &bv1) |
returns a text representation of a bit vector (a string of 0s and 1s) More... | |
template<typename T1 > | |
std::string | BitVectToFPSText (const T1 &bv1) |
returns a hex representation of a bit vector compatible with Andrew Dalke's FPS format More... | |
template<typename T1 > | |
std::string | BitVectToBinaryText (const T1 &bv1) |
returns a binary string representation of a bit vector (an array of bytes) More... | |
template<typename T1 > | |
void | UpdateBitVectFromFPSText (T1 &bv1, const std::string &fps) |
updates a bit vector from Andrew Dalke's FPS format More... | |
template<typename T1 > | |
void | UpdateBitVectFromBinaryText (T1 &bv1, const std::string &fps) |
updates a bit vector from a binary string representation of a bit vector (an array of bytes) More... | |
Contains general bit-comparison and similarity operations.
The notation used to document the similarity metrics is:
V1_n:
number of bits in vector 1V1_o:
number of on bits in vector 1(V1&V2)_o
: number of on bits in the intersection of vectors 1 and 2 Definition in file BitOps.h.
double AllBitSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the common-bit similarity (on and off) between two bit vects This is also called Manhattan similarity.
[bv1_n - (bv1^bv2)_o] / bv1_n
Referenced by SimilarityWrapper().
bool AllProbeBitsMatch | ( | const char * | probe, |
const char * | ref | ||
) |
Referenced by SimilarityWrapper().
bool AllProbeBitsMatch | ( | const std::string & | probe, |
const std::string & | ref | ||
) |
bool AllProbeBitsMatch | ( | const ExplicitBitVect & | probe, |
const ExplicitBitVect & | ref | ||
) |
bool AllProbeBitsMatch | ( | const T1 & | probe, |
const std::string & | pkl | ||
) |
bool AllProbeBitsMatch | ( | const T1 & | probe, |
const T1 & | ref | ||
) |
double AsymmetricSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Asymmetric similarity between two bit vects
(bv1&bv2)_o / min(bv1_o,bv2_o)
Referenced by SimilarityWrapper().
std::string BitVectToBinaryText | ( | const T1 & | bv1 | ) |
returns a binary string representation of a bit vector (an array of bytes)
bv1 | the vector to use |
Referenced by SimilarityWrapper().
std::string BitVectToFPSText | ( | const T1 & | bv1 | ) |
returns a hex representation of a bit vector compatible with Andrew Dalke's FPS format
bv1 | the vector to use |
Referenced by SimilarityWrapper().
std::string BitVectToText | ( | const T1 & | bv1 | ) |
returns a text representation of a bit vector (a string of 0s and 1s)
bv1 | the vector to use |
Referenced by SimilarityWrapper().
double BraunBlanquetSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Braun-Blanquet similarity between two bit vects
(bv1&bv2)_o / max(bv1_o,bv2_o)
Referenced by SimilarityWrapper().
double CosineSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Cosine similarity between two bit vects
(bv1&bv2)_o / sqrt(bv1_o + bv2_o)
Referenced by SimilarityWrapper().
double DiceSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Dice similarity between two bit vects
2*(bv1&bv2)_o / [bv1_o + bv2_o]
Referenced by SimilarityWrapper().
T1* FoldFingerprint | ( | const T1 & | bv1, |
unsigned int | factor = 2 |
||
) |
folds a bit vector factor
times and returns the result
bv1 | the vector to be folded |
factor | (optional) the number of times to fold it |
bv1_n/factor
long.Note: The caller is responsible for delete
ing the result.
Referenced by SimilarityWrapper().
double KulczynskiSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Kulczynski similarity between two bit vects
(bv1&bv2)_o * [bv1_o + bv2_o] / [2 * bv1_o * bv2_o]
Referenced by SimilarityWrapper().
double McConnaugheySimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the McConnaughey similarity between two bit vects
[(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o * bv2_o)
Referenced by SimilarityWrapper().
int NumBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the number of common bits (on and off) between two bit vects
bv1_n - (bv1^bv2)_o
Referenced by SimilarityWrapper().
int NumBitsInCommon | ( | const ExplicitBitVect & | bv1, |
const ExplicitBitVect & | bv2 | ||
) |
int NumOnBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the number of on bits in common between two bit vectors
Referenced by SimilarityWrapper().
int NumOnBitsInCommon | ( | const ExplicitBitVect & | bv1, |
const ExplicitBitVect & | bv2 | ||
) |
DoubleVect OffBitProjSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the on-bit projected similarities between two bit vects
[bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]
[bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]
Note: bv1_n = bv2_n
Referenced by SimilarityWrapper().
IntVect OffBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns an IntVect with indices of all off bits in common between two bit vects
Referenced by SimilarityWrapper().
DoubleVect OnBitProjSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the on-bit projected similarities between two bit vects
(bv1&bv2)_o / bv1_o
(bv1&bv2)_o / bv2_o
Referenced by SimilarityWrapper().
double OnBitSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the on bit similarity between two bit vects
(bv1&bv2)_o / (bv1|bv2)_o
Referenced by SimilarityWrapper().
IntVect OnBitsInCommon | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns an IntVect with indices of all on bits in common between two bit vects
Referenced by SimilarityWrapper().
double RogotGoldbergSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Rogot-Goldberg similarity between two bit vects
(bv1&bv2)_o / (bv1_o + bv2_o)
- (bv1_n - bv1_o - bv2_o + (bv1&bv2)_o) / (2*bv1_n - bv1_o - bv2_o)
Referenced by SimilarityWrapper().
double RusselSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Russel similarity between two bit vects
(bv1&bv2)_o / bv1_o
Note: that this operation is non-commutative: RusselSimilarity(bv1,bv2) != RusselSimilarity(bv2,bv1)
Referenced by SimilarityWrapper().
double SimilarityWrapper | ( | const T & | bv1, |
const T & | bv2, | ||
double(*)(const T &, const T &) | metric, | ||
bool | returnDistance = false |
||
) |
general purpose wrapper for calculating the similarity between two bvs that may be of unequal size (will automatically fold as appropriate)
Definition at line 30 of file BitOps.h.
References FoldFingerprint().
Referenced by RDDataManip::TanimotoDistanceMetric(), and RDDataManip::TanimotoSimilarityMetric().
double SimilarityWrapper | ( | const T & | bv1, |
const T & | bv2, | ||
double | a, | ||
double | b, | ||
double(*)(const T &, const T &, double, double) | metric, | ||
bool | returnDistance = false |
||
) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
Definition at line 50 of file BitOps.h.
References AllBitSimilarity(), AllProbeBitsMatch(), AsymmetricSimilarity(), BitVectToBinaryText(), BitVectToFPSText(), BitVectToText(), BraunBlanquetSimilarity(), CosineSimilarity(), DiceSimilarity(), FoldFingerprint(), KulczynskiSimilarity(), McConnaugheySimilarity(), NumBitsInCommon(), NumOnBitsInCommon(), OffBitProjSimilarity(), OffBitsInCommon(), OnBitProjSimilarity(), OnBitSimilarity(), OnBitsInCommon(), RogotGoldbergSimilarity(), RusselSimilarity(), SokalSimilarity(), TanimotoSimilarity(), TverskySimilarity(), UpdateBitVectFromBinaryText(), and UpdateBitVectFromFPSText().
double SokalSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Sokal similarity between two bit vects
(bv1&bv2)_o / [2*bv1_o + 2*bv2_o - 3*(bv1&bv2)_o]
Referenced by SimilarityWrapper().
double TanimotoSimilarity | ( | const T1 & | bv1, |
const T2 & | bv2 | ||
) |
returns the Tanimoto similarity between two bit vects
(bv1&bv2)_o / [bv1_o + bv2_o - (bv1&bv2)_o]
Referenced by SimilarityWrapper(), RDDataManip::TanimotoDistanceMetric(), and RDDataManip::TanimotoSimilarityMetric().
double TverskySimilarity | ( | const T1 & | bv1, |
const T2 & | bv2, | ||
double | a, | ||
double | b | ||
) |
returns the Tversky similarity between two bit vects
(bv1&bv2)_o / [a*bv1_o + b*bv2_o + (1 - a - b)*(bv1&bv2)_o]
Notes:
Referenced by SimilarityWrapper().
void UpdateBitVectFromBinaryText | ( | T1 & | bv1, |
const std::string & | fps | ||
) |
updates a bit vector from a binary string representation of a bit vector (an array of bytes)
bv1 | the vector to use |
fps | the binary string |
Referenced by SimilarityWrapper().
void UpdateBitVectFromFPSText | ( | T1 & | bv1, |
const std::string & | fps | ||
) |
updates a bit vector from Andrew Dalke's FPS format
bv1 | the vector to use |
fps | the FPS hex string |
Referenced by SimilarityWrapper().