1
2
3
4
5
6
7
8
9
10
11 """ Functionality for SATIS typing atoms
12
13 """
14 from __future__ import print_function
15 from rdkit import Chem
16 from rdkit.six.moves import xrange
17
18 _debug = 0
19
20
21
22
23
24 aldehydePatt = Chem.MolFromSmarts('[CD2]=[OD1]')
25 ketonePatt = Chem.MolFromSmarts('[CD3]=[OD1]')
26 amidePatt = Chem.MolFromSmarts('[CD3](=[OD1])-[#7]')
27 esterPatt = Chem.MolFromSmarts('C(=[OD1])-O-[#6]')
28 carboxylatePatt = Chem.MolFromSmarts('C(=[OD1])-[OX1]')
29 carboxylPatt = Chem.MolFromSmarts('C(=[OD1])-[OX2]')
30
31 specialCases = ((carboxylatePatt,97),
32 (esterPatt,96),
33 (carboxylPatt,98),
34 (amidePatt,95),
35 (ketonePatt,94),
36 (aldehydePatt,93))
37
38
40 """ returns SATIS codes for all atoms in a molecule
41
42 The SATIS definition used is from:
43 J. Chem. Inf. Comput. Sci. _39_ 751-757 (1999)
44
45 each SATIS code is a string consisting of _neighborsToInclude_ + 1
46 2 digit numbers
47
48 **Arguments**
49
50 - mol: a molecule
51
52 - neighborsToInclude (optional): the number of neighbors to include
53 in the SATIS codes
54
55 **Returns**
56
57 a list of strings nAtoms long
58
59 """
60 global specialCases
61
62 nAtoms = mol.GetNumAtoms()
63 atomicNums = [0]*nAtoms
64 atoms = mol.GetAtoms()
65 for i in xrange(nAtoms):
66 atomicNums[i] = atoms[i].GetAtomicNum()
67
68 nSpecialCases = len(specialCases)
69 specialCaseMatches = [None]*nSpecialCases
70 for i,(patt,idx) in enumerate(specialCases):
71 if mol.HasSubstructMatch(patt):
72 specialCaseMatches[i] = mol.GetSubstructMatches(patt)
73 else:
74 specialCaseMatches[i] = ()
75
76 codes = [None]*nAtoms
77 for i in range(nAtoms):
78 code = [99]*(neighborsToInclude+1)
79 atom = atoms[i]
80 atomIdx = atom.GetIdx()
81 code[0] = min(atom.GetAtomicNum(),99)
82 bonds = atom.GetBonds()
83 nBonds = len(bonds)
84 otherIndices = [-1]*nBonds
85 if _debug: print(code[0],end='')
86 for j in range(nBonds):
87 otherIndices[j] = bonds[j].GetOtherAtom(atom).GetIdx()
88 if _debug: print(otherIndices[j],end='')
89 if _debug: print()
90 otherNums = [atomicNums[x] for x in otherIndices] + \
91 [1]*atom.GetTotalNumHs()
92 otherNums.sort()
93
94 nOthers = len(otherNums)
95 if nOthers > neighborsToInclude:
96 otherNums.reverse()
97 otherNums = otherNums[:neighborsToInclude]
98 otherNums.reverse()
99 for j in range(neighborsToInclude):
100 code[j+1] = min(otherNums[j],99)
101 else:
102 for j in range(nOthers):
103 code[j+1] = min(otherNums[j],99)
104 if nOthers < neighborsToInclude and code[0] in [6,8]:
105 found = 0
106 for j in range(nSpecialCases):
107 for matchTuple in specialCaseMatches[j]:
108 if atomIdx in matchTuple:
109 code[-1] = specialCases[j][1]
110 found = 1
111 break
112 if found:
113 break
114
115 codes[i] = ''.join(['%02d'%(x) for x in code])
116 return codes
117
118 if __name__ == '__main__':
119 smis = ['CC(=O)NC','CP(F)(Cl)(Br)(O)',
120 'O=CC(=O)C','C(=O)OCC(=O)O','C(=O)[O-]']
121 for smi in smis:
122 print(smi)
123 m = Chem.MolFromSmiles(smi)
124 codes = SATISTypes(m)
125 print(codes)
126