1
2
3
4
5
6
7
8
9
10
11 from __future__ import print_function
12
13 raise NotImplementedError('not finished yet')
14 """ lazy generator of 2D pharmacophore signature data
15
16 """
17 import rdkit.Chem
18 from rdkit.Chem.Pharm2D import SigFactory,Matcher,Utils
19
21 """
22
23 Important attributes:
24
25 - mol: the molecules whose signature is being worked with
26
27 - sigFactory : the SigFactory object with signature parameters
28 NOTE: no preprocessing is carried out for _sigFactory_.
29 It *must* be pre-initialized.
30
31 **Notes**
32
33 -
34 """
35 - def __init__(self,sigFactory,mol,dMat=None,bitCache=True):
36 """ constructor
37
38 **Arguments**
39
40 - sigFactory: a signature factory, see class docs
41
42 - mol: a molecule, see class docs
43
44 - dMat: (optional) a distance matrix for the molecule. If this
45 is not provided, one will be calculated
46
47 - bitCache: (optional) if nonzero, a local cache of which bits
48 have been queried will be maintained. Otherwise things must
49 be recalculate each time a bit is queried.
50
51 """
52 if not isinstance(sigFactory,SigFactory.SigFactory):
53 raise ValueError('bad factory')
54
55 self.sigFactory = sigFactory
56 self.mol = mol
57
58 if dMat is None:
59 useBO = sigFactory.includeBondOrder
60 dMat = Chem.GetDistanceMatrix(mol,useBO)
61
62 self.dMat = dMat
63
64 if bitCache:
65 self.bits = {}
66 else:
67 self.bits = None
68
69 featFamilies=[fam for fam in sigFactory.featFactory.GetFeatureFamilies() if fam not in sigFactory.skipFeats]
70 nFeats = len(featFamilies)
71 featMatches={}
72 for fam in featFamilies:
73 featMatches[fam] = []
74 feats = sigFactory.featFactory.GetFeaturesForMol(mol)
75 for feat in feats:
76 if feat.GetFamily() not in sigFactory.skipFeats:
77 featMatches[feat.GetFamily()].append(feat.GetAtomIds())
78 featMatches = [None]*nFeats
79 for i in range(nFeats):
80 featMatches[i]=sigFactory.featFactory.GetMolFeature()
81 self.pattMatches = pattMatches
82
84 """ returns a bool indicating whether or not the bit is set
85
86 """
87 if idx < 0 or idx >= self.sig.GetSize():
88 raise IndexError('Index %d invalid'%(idx))
89 if self.bits is not None and self.bits.has_key(idx):
90 return self.bits[idx]
91
92 tmp = Matcher.GetAtomsMatchingBit(self.sig,idx,self.mol,
93 dMat=self.dMat,justOne=1,
94 matchingAtoms=self.pattMatches)
95 if not tmp or len(tmp)==0: res = 0
96 else: res = 1
97
98 if self.bits is not None:
99 self.bits[idx] = res
100 return res
101
103 """ allows class to support len()
104
105 """
106 return self.sig.GetSize()
108 """ allows class to support random access.
109 Calls self.GetBit()
110
111 """
112 return self.GetBit(itm)
113
114
115
116
117 if __name__ == '__main__':
118 import time
119 from rdkit import RDConfig,Chem
120 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate
121 import random
122
123 factory = Gobbi_Pharm2D.factory
124 nToDo=100
125 inD = open(RDConfig.RDDataDir+"/NCI/first_5K.smi",'r').readlines()[:nToDo]
126 mols = [None]*len(inD)
127 for i in range(len(inD)):
128 smi = inD[i].split('\t')[0]
129 smi.strip()
130 mols[i] = Chem.MolFromSmiles(smi)
131
132 sig = factory.GetSignature()
133
134 nBits = 300
135 random.seed(23)
136 bits = [random.randint(0,sig.GetSize()-1) for x in range(nBits)]
137
138 print('Using the Lazy Generator')
139 t1 = time.time()
140 for i in range(len(mols)):
141 if not i % 10: print('done mol %d of %d'%(i,len(mols)))
142 gen = Generator(factory,mols[i])
143 for bit in bits:
144 v = gen[bit]
145 t2 = time.time()
146 print('\tthat took %4.2f seconds'%(t2-t1))
147
148
149 print('Generating and checking signatures')
150 t1 = time.time()
151 for i in range(len(mols)):
152 if not i % 10: print('done mol %d of %d'%(i,len(mols)))
153 sig = Generate.Gen2DFingerprint(mols[i],factory)
154 for bit in bits:
155 v = sig[bit]
156 t2 = time.time()
157 print('\tthat took %4.2f seconds'%(t2-t1))
158