1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 import os,weakref,re
35 from rdkit.six.moves import cStringIO as StringIO
36 from rdkit import RDConfig
37
62
65
66 groupDefns = {}
67 hierarchy=None
68 lastData=None
69 lastFilename=None
71 global groupDefns,hierarchy,lastData,lastFilename
72 if not force and hierarchy and (not data or data==lastData) and \
73 (not fileNm or fileNm==lastFilename):
74 return hierarchy[:]
75 lastData=data
76 splitter = re.compile('\t+')
77 from rdkit import Chem
78
79 if not fileNm and not data:
80 fileNm = os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt')
81
82 if fileNm:
83 inF = open(fileNm,'r')
84 lastFilename = fileNm
85 elif data:
86 inF = StringIO(data)
87 else:
88 raise ValueError("need data or filename")
89
90 groupDefns={}
91 res = []
92 lineNo=0
93 for line in inF.readlines():
94 lineNo+=1
95 line=line.strip()
96 line = line.split('//')[0]
97 if not line:
98 continue
99 splitL = splitter.split(line)
100 if len(splitL)<3:
101 raise FuncGroupFileParseError("Input line %d (%s) is not long enough."%(lineNo,repr(line)))
102 label = splitL[0].strip()
103 if label in groupDefns:
104 raise FuncGroupFileParseError("Duplicate label on line %d."%lineNo)
105 labelHierarchy = label.split('.')
106 if len(labelHierarchy)>1:
107 for i in range(len(labelHierarchy)-1):
108 tmp = '.'.join(labelHierarchy[:i+1])
109 if not tmp in groupDefns:
110 raise FuncGroupFileParseError("Hierarchy member %s (line %d) not found."%(tmp,lineNo))
111 parent = groupDefns['.'.join(labelHierarchy[:-1])]
112 else:
113 parent = None
114 smarts = splitL[1]
115 try:
116 patt = Chem.MolFromSmarts(smarts)
117 except:
118 import traceback
119 traceback.print_exc()
120 patt = None
121 if not patt:
122 raise FuncGroupFileParseError('Smarts "%s" (line %d) could not be parsed.'%(smarts,lineNo))
123
124 name = splitL[2].strip()
125
126 rxnSmarts=''
127 if len(splitL)>3:
128 rxnSmarts=splitL[3]
129
130 node = FGHierarchyNode(name,patt,smarts=smarts,label=label,parent=parent,rxnSmarts=rxnSmarts)
131 if parent:
132 parent.children.append(node)
133 else:
134 res.append(node)
135 groupDefns[label] = node
136 hierarchy=res[:]
137 return res
138
140 ms = mol.GetSubstructMatches(node.pattern)
141 count = 0
142 seen = {}
143 for m in ms:
144 if m[0] not in seen:
145 count+=1
146 seen[m[0]] = 1
147 if count:
148 res[idx] = count
149 idx += 1
150 for child in node.children:
151 idx=_SetNodeBits(mol,child,res,idx)
152 else:
153 idx += len(node)
154 return idx
155
157 totL = 0
158 for entry in hierarchy:
159 totL += len(entry)
160 res = [0]*totL
161 idx = 0
162 for entry in hierarchy:
163 idx = _SetNodeBits(mol,entry,res,idx)
164 return res
165