1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 from rdkit import Chem
36 import os,re
37
38 from rdkit import RDConfig
39
41 defnFilename=os.path.join(RDConfig.RDDataDir,'Salts.txt')
42 defnData = None
43 salts = None
44 - def __init__(self,defnFilename=None,defnData=None):
49
51 """
52
53 >>> remover = SaltRemover()
54 >>> len(remover.salts)>0
55 True
56
57 >>> remover = SaltRemover(defnData="[Cl,Br]")
58 >>> len(remover.salts)
59 1
60
61 """
62 whitespace = re.compile(r'[\t ]+')
63 if self.defnData:
64 from rdkit.six.moves import cStringIO as StringIO
65 inF = StringIO(self.defnData)
66 else:
67 inF = open(self.defnFilename,'r')
68 self.salts = []
69 for line in inF:
70 line = line.strip().split('//')[0]
71 if line:
72 splitL = whitespace.split(line)
73 try:
74 salt = Chem.MolFromSmarts(splitL[0])
75 except:
76 import traceback
77 traceback.print_exc()
78 raise ValueError(line)
79 self.salts.append(salt)
80
81 - def StripMol(self,mol,dontRemoveEverything=False):
82 """
83
84 >>> remover = SaltRemover(defnData="[Cl,Br]")
85 >>> len(remover.salts)
86 1
87
88 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
89 >>> res = remover.StripMol(mol)
90 >>> res is not None
91 True
92 >>> res.GetNumAtoms()
93 4
94
95 Notice that all salts are removed:
96 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br')
97 >>> res = remover.StripMol(mol)
98 >>> res.GetNumAtoms()
99 4
100
101 Matching (e.g. "salt-like") atoms in the molecule are unchanged:
102 >>> mol = Chem.MolFromSmiles('CN(Br)Cl')
103 >>> res = remover.StripMol(mol)
104 >>> res.GetNumAtoms()
105 4
106
107 >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl')
108 >>> res = remover.StripMol(mol)
109 >>> res.GetNumAtoms()
110 4
111
112 Charged salts are handled reasonably:
113 >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]')
114 >>> res = remover.StripMol(mol)
115 >>> res.GetNumAtoms()
116 4
117
118
119 Watch out for this case (everything removed):
120 >>> remover = SaltRemover()
121 >>> len(remover.salts)>1
122 True
123 >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]')
124 >>> res = remover.StripMol(mol)
125 >>> res.GetNumAtoms()
126 0
127
128 dontRemoveEverything helps with this by leaving the last salt:
129 >>> res = remover.StripMol(mol,dontRemoveEverything=True)
130 >>> res.GetNumAtoms()
131 4
132
133 but in cases where the last salts are the same, it can't choose
134 between them, so it returns all of them:
135 >>> mol = Chem.MolFromSmiles('Cl.Cl')
136 >>> res = remover.StripMol(mol,dontRemoveEverything=True)
137 >>> res.GetNumAtoms()
138 2
139
140 """
141 def _applyPattern(m,salt,notEverything):
142 nAts = m.GetNumAtoms()
143 if not nAts:
144 return m
145 res = m
146
147 t = Chem.DeleteSubstructs(res,salt,True)
148 if not t or (notEverything and t.GetNumAtoms()==0):
149 return res;
150 else:
151 res = t
152 while res.GetNumAtoms() and nAts>res.GetNumAtoms():
153 nAts = res.GetNumAtoms()
154 t = Chem.DeleteSubstructs(res,salt,True)
155 if notEverything and t.GetNumAtoms()==0:
156 break
157 else:
158 res = t
159 return res
160
161 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1:
162 return mol
163 modified=False
164 for i,salt in enumerate(self.salts):
165 tMol = _applyPattern(mol,salt,dontRemoveEverything)
166 if tMol is not mol:
167 mol = tMol
168 modified=True
169 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1:
170 break
171 if modified and mol.GetNumAtoms()>0:
172 Chem.SanitizeMol(mol)
173 return mol
174
175 - def __call__(self,mol,dontRemoveEverything=False):
176 """
177
178 >>> remover = SaltRemover(defnData="[Cl,Br]")
179 >>> len(remover.salts)
180 1
181
182 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
183 >>> res = remover(mol)
184 >>> res is not None
185 True
186 >>> res.GetNumAtoms()
187 4
188
189 """
190 return self.StripMol(mol,dontRemoveEverything=dontRemoveEverything)
191
192
193
194
195
196
198 import doctest,sys
199 return doctest.testmod(sys.modules["__main__"])
200
201
202 if __name__ == '__main__':
203 import sys
204 failed,tried = _test()
205 sys.exit(failed)
206