1
2
3
4
5
6
7
8
9
10
11 """ contains factory class for producing signatures
12
13
14 """
15 from __future__ import print_function, division
16 from rdkit.DataStructs import SparseBitVect,IntSparseIntVect,LongSparseIntVect
17 from rdkit.Chem.Pharm2D import Utils
18 import copy
19 import numpy
20
21 _verbose = False
22
23
25 """
26
27 SigFactory's are used by creating one, setting the relevant
28 parameters, then calling the GetSignature() method each time a
29 signature is required.
30
31 """
32 - def __init__(self,featFactory,useCounts=False,minPointCount=2,maxPointCount=3,
33 shortestPathsOnly=True,includeBondOrder=False,skipFeats=None,
34 trianglePruneBins=True):
35 self.featFactory = featFactory
36 self.useCounts=useCounts
37 self.minPointCount=minPointCount
38 self.maxPointCount=maxPointCount
39 self.shortestPathsOnly=shortestPathsOnly
40 self.includeBondOrder=includeBondOrder
41 self.trianglePruneBins=trianglePruneBins
42 if skipFeats is None:
43 self.skipFeats=[]
44 else:
45 self.skipFeats=skipFeats
46 self._bins = None
47 self.sigKlass=None
48
49
51 """ bins should be a list of 2-tuples """
52 self._bins = copy.copy(bins)
53 self.Init()
54
58 return len(self._bins)
59
61 return self.sigKlass(self._sigSize)
62
64 nPts,combo,scaffold = self.GetBitInfo(bitIdx)
65 fams=self.GetFeatFamilies()
66 labels = [fams[x] for x in combo]
67 dMat = numpy.zeros((nPts,nPts),numpy.int)
68 dVect = Utils.nPointDistDict[nPts]
69 for idx in range(len(dVect)):
70 i,j = dVect[idx]
71 dMat[i,j] = scaffold[idx]
72 dMat[j,i] = scaffold[idx]
73
74 return nPts,combo,scaffold,labels,dMat
75
76 - def GetBitDescriptionAsText(self,bitIdx,includeBins=0,fullPage=1):
77 """ returns text with a description of the bit
78
79 **Arguments**
80
81 - bitIdx: an integer bit index
82
83 - includeBins: (optional) if nonzero, information about the bins will be
84 included as well
85
86 - fullPage: (optional) if nonzero, html headers and footers will
87 be included (so as to make the output a complete page)
88
89 **Returns**
90
91 a string with the HTML
92
93 """
94 nPts,combo,scaffold,labels,dMat=self._GetBitSummaryData(bitIdx)
95
97 """ returns a text description of the bit
98
99 **Arguments**
100
101 - bitIdx: an integer bit index
102
103 **Returns**
104
105 a string
106
107 """
108 nPts,combo,scaffold,labels,dMat=self._GetBitSummaryData(bitIdx)
109 res = " ".join(labels)+ " "
110 for row in dMat:
111 res += "|"+" ".join([str(x) for x in row])
112 res += "|"
113 return res
114
116 """ OBSOLETE: this has been rewritten in C++
117 Internal use only
118 Returns the index of a bin defined by a set of distances.
119
120 **Arguments**
121
122 - dists: a sequence of distances (not binned)
123
124 - bins: a sorted sequence of distance bins (2-tuples)
125
126 - scaffolds: a list of possible scaffolds (bin combinations)
127
128 **Returns**
129
130 an integer bin index
131
132 **Note**
133
134 the value returned here is not an index in the overall
135 signature. It is, rather, an offset of a scaffold in the
136 possible combinations of distance bins for a given
137 proto-pharmacophore.
138
139 """
140 nBins = len(bins)
141 nDists = len(dists)
142 whichBins = [0]*nDists
143
144
145
146
147
148
149 for i in range(nDists):
150 dist = dists[i]
151 where = -1
152
153
154 startP,endP = 0,len(bins)
155 while startP<endP:
156 midP = (startP+endP) // 2
157 begBin,endBin = bins[midP]
158 if dist < begBin:
159 endP = midP
160 elif dist >= endBin:
161 startP = midP+1
162 else:
163 where = midP
164 break
165 if where < 0:
166 return None
167 whichBins[i] = where
168 res = scaffolds.index(tuple(whichBins))
169 if _verbose:
170 print('----- _fBI -----------')
171 print(' scaffolds:',scaffolds)
172 print(' bins:',whichBins)
173 print(' res:',res)
174 return res
175
177 fams = [fam for fam in self.featFactory.GetFeatureFamilies() if fam not in self.skipFeats]
178 fams.sort()
179 return fams
180
182 featFamilies=self.GetFeatFamilies()
183 featMatches = {}
184 for fam in featFamilies:
185 featMatches[fam] = []
186 feats = self.featFactory.GetFeaturesForMol(mol,includeOnly=fam)
187 for feat in feats:
188 featMatches[fam].append(feat.GetAtomIds())
189 return [featMatches[x] for x in featFamilies]
190
191 - def GetBitIdx(self,featIndices,dists,sortIndices=True):
192 """ returns the index for a pharmacophore described using a set of
193 feature indices and distances
194
195 **Arguments***
196
197 - featIndices: a sequence of feature indices
198
199 - dists: a sequence of distance between the features, only the
200 unique distances should be included, and they should be in the
201 order defined in Utils.
202
203 - sortIndices : sort the indices
204
205 **Returns**
206
207 the integer bit index
208
209 """
210 nPoints = len(featIndices)
211 if nPoints>3:
212 raise NotImplementedError('>3 points not supported')
213 if nPoints < self.minPointCount: raise IndexError('bad number of points')
214 if nPoints > self.maxPointCount: raise IndexError('bad number of points')
215
216
217 startIdx = self._starts[nPoints]
218
219
220
221
222 if sortIndices:
223 tmp = list(featIndices)
224 tmp.sort()
225 featIndices = tmp
226
227 if featIndices[0]<0: raise IndexError('bad feature index')
228 if max(featIndices)>=self._nFeats: raise IndexError('bad feature index')
229
230 if nPoints==3:
231 featIndices,dists=Utils.OrderTriangle(featIndices,dists)
232
233
234 offset = Utils.CountUpTo(self._nFeats,nPoints,featIndices)
235 if _verbose: print('offset for feature %s: %d'%(str(featIndices),offset))
236 offset *= len(self._scaffolds[len(dists)])
237
238
239 try:
240 if _verbose:
241 print('>>>>>>>>>>>>>>>>>>>>>>>')
242 print('\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)]))
243 print('\tDists:',repr(dists),type(dists))
244 print('\tbins:',repr(self._bins),type(self._bins))
245 bin = self._findBinIdx(dists,self._bins,self._scaffolds[len(dists)])
246 except ValueError:
247 fams = self.GetFeatFamilies()
248 fams = [fams[x] for x in featIndices]
249 raise IndexError('distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s'%(fams,dists,self._bins,self._scaffolds))
250
251 return startIdx + offset + bin
252
254 """ returns information about the given bit
255
256 **Arguments**
257
258 - idx: the bit index to be considered
259
260 **Returns**
261
262 a 3-tuple:
263
264 1) the number of points in the pharmacophore
265
266 2) the proto-pharmacophore (tuple of pattern indices)
267
268 3) the scaffold (tuple of distance indices)
269
270 """
271 if idx >= self._sigSize:
272 raise IndexError('bad index (%d) queried. %d is the max'%(idx,self._sigSize))
273
274 nPts = self.minPointCount
275 while nPts < self.maxPointCount and self._starts[nPts+1]<=idx:
276 nPts+=1
277
278
279 offsetFromStart = idx - self._starts[nPts]
280 if _verbose:
281 print('\t %d Points, %d offset'%(nPts,offsetFromStart))
282
283
284 nDists = len(Utils.nPointDistDict[nPts])
285 scaffolds = self._scaffolds[nDists]
286
287 nScaffolds = len(scaffolds)
288
289
290 protoIdx = offsetFromStart // nScaffolds
291 indexCombos = Utils.GetIndexCombinations(self._nFeats,nPts)
292 combo = tuple(indexCombos[protoIdx])
293 if _verbose:
294 print('\t combo: %s'%(str(combo)))
295
296
297 scaffoldIdx = offsetFromStart % nScaffolds
298 scaffold = scaffolds[scaffoldIdx]
299 if _verbose:
300 print('\t scaffold: %s'%(str(scaffold)))
301 return nPts,combo,scaffold
302
304 """ Initializes internal parameters. This **must** be called after
305 making any changes to the signature parameters
306
307 """
308 accum = 0
309 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.maxPointCount+1]))
310 self._starts = {}
311 if not self.skipFeats:
312 self._nFeats = len(self.featFactory.GetFeatureFamilies())
313 else:
314 self._nFeats = 0
315 for fam in self.featFactory.GetFeatureFamilies():
316 if fam not in self.skipFeats:
317 self._nFeats+=1
318 for i in range(self.minPointCount,self.maxPointCount+1):
319 self._starts[i] = accum
320 nDistsHere = len(Utils.nPointDistDict[i])
321 scaffoldsHere = Utils.GetPossibleScaffolds(i,self._bins,
322 useTriangleInequality=self.trianglePruneBins)
323 nBitsHere = len(scaffoldsHere)
324 self._scaffolds[nDistsHere] = scaffoldsHere
325 pointsHere = Utils.NumCombinations(self._nFeats,i) * nBitsHere
326 accum += pointsHere
327 self._sigSize = accum
328 if not self.useCounts:
329 self.sigKlass = SparseBitVect
330 elif self._sigSize<2**31:
331 self.sigKlass = IntSparseIntVect
332 else:
333 self.sigKlass = LongSparseIntVect
334
337 try:
338 from rdkit.Chem.Pharmacophores import cUtils
339 except ImportError:
340 pass
341 else:
342 SigFactory._findBinIdx = cUtils.FindBinIdx
343