1
2
3
4
5
6
7
8
9
10
11 """ contains SMARTS definitions and calculators for EState atom types
12
13 defined in: Hall and Kier JCICS _35_ 1039-1045 (1995) Table 1
14 """
15 from rdkit import Chem
16
17 _rawD = [
18 ('sLi','[LiD1]-*'),
19
20 ('ssBe','[BeD2](-*)-*'),
21 ('ssssBe','[BeD4](-*)(-*)(-*)-*'),
22
23 ('ssBH', '[BD2H](-*)-*'),
24 ('sssB', '[BD3](-*)(-*)-*'),
25 ('ssssB','[BD4](-*)(-*)(-*)-*'),
26
27 ('sCH3', '[CD1H3]-*'),
28 ('dCH2', '[CD1H2]=*'),
29 ('ssCH2','[CD2H2](-*)-*'),
30 ('tCH', '[CD1H]#*'),
31 ('dsCH', '[CD2H](=*)-*'),
32 ('aaCH', '[C,c;D2H](:*):*'),
33 ('sssCH','[CD3H](-*)(-*)-*'),
34 ('ddC', '[CD2H0](=*)=*'),
35 ('tsC', '[CD2H0](#*)-*'),
36 ('dssC', '[CD3H0](=*)(-*)-*'),
37 ('aasC', '[C,c;D3H0](:*)(:*)-*'),
38 ('aaaC', '[C,c;D3H0](:*)(:*):*'),
39 ('ssssC','[CD4H0](-*)(-*)(-*)-*'),
40
41 ('sNH3', '[ND1H3]-*'),
42 ('sNH2', '[ND1H2]-*'),
43 ('ssNH2','[ND2H2](-*)-*'),
44 ('dNH', '[ND1H]=*'),
45 ('ssNH', '[ND2H](-*)-*'),
46 ('aaNH', '[N,nD2H](:*):*'),
47 ('tN', '[ND1H0]#*'),
48 ('sssNH','[ND3H](-*)(-*)-*'),
49 ('dsN', '[ND2H0](=*)-*'),
50 ('aaN', '[N,nD2H0](:*):*'),
51 ('sssN', '[ND3H0](-*)(-*)-*'),
52 ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),
53 ('aasN', '[N,nD3H0](:*)(:*)-,:*'),
54 ('ssssN','[ND4H0](-*)(-*)(-*)-*'),
55
56 ('sOH','[OD1H]-*'),
57 ('dO', '[OD1H0]=*'),
58 ('ssO','[OD2H0](-*)-*'),
59 ('aaO','[O,oD2H0](:*):*'),
60
61 ('sF','[FD1]-*'),
62
63 ('sSiH3', '[SiD1H3]-*'),
64 ('ssSiH2','[SiD2H2](-*)-*'),
65 ('sssSiH','[SiD3H1](-*)(-*)-*'),
66 ('ssssSi','[SiD4H0](-*)(-*)(-*)-*'),
67
68 ('sPH2', '[PD1H2]-*'),
69 ('ssPH', '[PD2H1](-*)-*'),
70 ('sssP', '[PD3H0](-*)(-*)-*'),
71 ('dsssP', '[PD4H0](=*)(-*)(-*)-*'),
72 ('sssssP','[PD5H0](-*)(-*)(-*)(-*)-*'),
73
74 ('sSH', '[SD1H1]-*'),
75 ('dS', '[SD1H0]=*'),
76 ('ssS', '[SD2H0](-*)-*'),
77 ('aaS', '[S,sD2H0](:*):*'),
78 ('dssS', '[SD3H0](=*)(-*)-*'),
79 ('ddssS','[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),
80
81 ('sCl', '[ClD1]-*'),
82
83 ('sGeH3', '[GeD1H3](-*)'),
84 ('ssGeH2','[GeD2H2](-*)-*'),
85 ('sssGeH','[GeD3H1](-*)(-*)-*'),
86 ('ssssGe','[GeD4H0](-*)(-*)(-*)-*'),
87
88 ('sAsH2', '[AsD1H2]-*'),
89 ('ssAsH', '[AsD2H1](-*)-*'),
90 ('sssAs', '[AsD3H0](-*)(-*)-*'),
91 ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'),
92 ('sssssAs','[AsD5H0](-*)(-*)(-*)(-*)-*'),
93
94 ('sSeH', '[SeD1H1]-*'),
95 ('dSe', '[SeD1H0]=*'),
96 ('ssSe', '[SeD2H0](-*)-*'),
97 ('aaSe', '[SeD2H0](:*):*'),
98 ('dssSe', '[SeD3H0](=*)(-*)-*'),
99 ('ddssSe','[SeD4H0](=*)(=*)(-*)-*'),
100
101 ('sBr','[BrD1]-*'),
102
103 ('sSnH3', '[SnD1H3]-*'),
104 ('ssSnH2','[SnD2H2](-*)-*'),
105 ('sssSnH','[SnD3H1](-*)(-*)-*'),
106 ('ssssSn','[SnD4H0](-*)(-*)(-*)-*'),
107
108 ('sI','[ID1]-*'),
109
110 ('sPbH3', '[PbD1H3]-*'),
111 ('ssPbH2','[PbD2H2](-*)-*'),
112 ('sssPbH','[PbD3H1](-*)(-*)-*'),
113 ('ssssPb','[PbD4H0](-*)(-*)(-*)-*'),
114 ]
115
116 esPatterns=None
118 """ Internal Use Only
119
120 """
121 global esPatterns,_rawD
122 if rawV is None:
123 rawV = _rawD
124
125 esPatterns = [None]*len(rawV)
126 for i,(name,sma) in enumerate(rawV):
127 try:
128 patt = Chem.MolFromSmarts(sma)
129 except:
130 sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n'%(sma,name))
131 else:
132 esPatterns[i] = name,patt
133
134
136 """ assigns each atom in a molecule to an EState type
137
138 **Returns:**
139
140 list of tuples (atoms can possibly match multiple patterns) with atom types
141
142 """
143 if esPatterns is None:
144 BuildPatts()
145 nAtoms = mol.GetNumAtoms()
146 res = [None]*nAtoms
147 for name,patt in esPatterns:
148 matches = mol.GetSubstructMatches(patt,uniquify=0)
149 for match in matches:
150 idx = match[0]
151 if res[idx] is None:
152 res[idx] = [name]
153 elif name not in res[idx]:
154 res[idx].append(name)
155 for i,v in enumerate(res):
156 if v is not None:
157 res[i] = tuple(v)
158 else:
159 res[i] = ()
160 return res
161