1
2
3
4
5
6
7
8
9
10
11 """ class definitions for similarity screening
12
13 See _SimilarityScreener_ for overview of required API
14
15 """
16 from rdkit import DataStructs
17 from rdkit.DataStructs import TopNContainer
18 from rdkit import RDConfig
19 from rdkit import six
20
22 """ base class
23
24 important attributes:
25 probe: the probe fingerprint against which we screen.
26
27 metric: a function that takes two arguments and returns a similarity
28 measure between them
29
30 dataSource: the source pool from which to draw, needs to support
31 a next() method
32
33 fingerprinter: a function that takes a molecule and returns a
34 fingerprint of the appropriate format
35
36
37 **Notes**
38 subclasses must support either an iterator interface
39 or __len__ and __getitem__
40 """
41 - def __init__(self,probe=None,metric=None,dataSource=None,fingerprinter=None):
42 self.metric = metric
43 self.dataSource = dataSource
44 self.fingerprinter = fingerprinter
45 self.probe = probe
46
48 """ used to reset screeners that behave as iterators
49 """
50 pass
51
52
54 """ sets our probe fingerprint """
55 self.probe = probeFingerprint
56
58 """ returns a fingerprint for a single probe object
59
60 This is potentially useful in initializing our internal
61 probe object.
62
63 """
64 return self.fingerprinter(probe)
65
67 """ Used to return all compounds that have a similarity
68 to the probe beyond a threshold value
69
70 **Notes**:
71
72 - This is as lazy as possible, so the data source isn't
73 queried until the client asks for a hit.
74
75 - In addition to being lazy, this class is as thin as possible.
76 (Who'd have thought it was possible!)
77 Hits are *not* stored locally, so if a client resets
78 the iteration and starts over, the same amount of work must
79 be done to retrieve the hits.
80
81 - The thinness and laziness forces us to support only forward
82 iteration (not random access)
83
84 """
89
90
92 """ *Internal use only* """
93 done = 0
94 res = None
95 sim = 0
96 while not done:
97
98
99 obj = six.next(self.dataIter)
100 fp = self.fingerprinter(obj)
101 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric)
102 if sim >= self.threshold:
103 res = obj
104 done = 1
105 return sim,res
106
108 """ used to reset our internal state so that iteration
109 starts again from the beginning
110 """
111 self.dataSource.reset()
112 self.dataIter = iter(self.dataSource)
113
115 """ returns an iterator for this screener
116 """
117 self.Reset()
118 return self
119
121 """ required part of iterator interface """
122 return self._nextMatch()
123
124 __next__ = next
125
126
128 """ A screener that only returns the top N hits found
129
130 **Notes**
131
132 - supports forward iteration and getitem
133
134 """
140
148
150 if self._pos >= self.numToGet:
151 raise StopIteration
152 else:
153 res = self.topN[self._pos]
154 self._pos += 1
155 return res
156
157 __next__ = next
158
165
167 if self.topN is None:
168 self._initTopN()
169 return self.numToGet
170
175