1
2
3
4
5
6
7
8
9
10
11 import sys
12
13 from rdkit import Chem
14 from rdkit.Chem.rdfragcatalog import *
15
16
19
20
26
28 inFile = open(fileName,'r')
29 nRead = 0
30 res = []
31 for line in inFile.xreadlines():
32 nRead += 1
33 splitL = [x.strip() for x in line.split(delim)]
34 if nRead != 1 and len(splitL):
35 bit = BitGainsInfo()
36 bit.id = int(splitL[0])
37 col = 1
38 if haveDescriptions:
39 bit.description = splitL[col]
40 col += 1
41 bit.gain = float(splitL[col])
42 col += 1
43 nPerClass = []
44 for entry in splitL[col:]:
45 nPerClass.append(int(entry))
46 bit.nPerClass = nPerClass
47 res.append(bit)
48 if len(res)==nToDo:
49 break
50 return res
51
53 adjs = {}
54 levels = {}
55 bitIds = [bit.id for bit in bits]
56 for bitId in bitIds:
57 entry = catalog.GetBitEntryId(bitId)
58 tmp = []
59 order = catalog.GetEntryOrder(entry)
60 s = levels.get(order,set())
61 s.add(bitId)
62 levels[order] = s
63 for down in catalog.GetEntryDownIds(entry):
64 id = catalog.GetEntryBitId(down)
65 if not limitInclusion or id in bitIds:
66 tmp.append(id)
67 order = catalog.GetEntryOrder(down)
68 s = levels.get(order,set())
69 s.add(id)
70 levels[order] = s
71 adjs[bitId] = tmp
72 if orderLevels:
73
74
75 for order in levels.keys():
76 ids = levels[order]
77 counts = [len(adjs[id]) for id in ids]
78 countOrder = argsort(counts)
79 l = [ids[x] for x in countOrder]
80 l.reverse()
81 levels[order] = l
82 return adjs,levels
83
85 res = []
86 if isinstance(bit,BitGainsInfo):
87 bitId = bit.id
88 else:
89 bitId = bit
90 for i,mol in enumerate(mols):
91 fp = fps[i]
92 if fp[bitId]:
93 res.append(mol)
94 return res
95