Package rdkit :: Package Chem :: Module SaltRemover
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SaltRemover

  1  # $Id$ 
  2  # 
  3  #  Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, Dec 2006 
 33  # 
 34   
 35  from rdkit import Chem 
 36  import os,re 
 37   
 38  from rdkit import RDConfig 
 39   
40 -class SaltRemover(object):
41 defnFilename=os.path.join(RDConfig.RDDataDir,'Salts.txt') 42 defnData = None 43 salts = None
44 - def __init__(self,defnFilename=None,defnData=None):
45 if defnFilename: 46 self.defnFilename = defnFilename 47 self.defnData = defnData 48 self._initPatterns()
49
50 - def _initPatterns(self):
51 """ 52 53 >>> remover = SaltRemover() 54 >>> len(remover.salts)>0 55 True 56 57 >>> remover = SaltRemover(defnData="[Cl,Br]") 58 >>> len(remover.salts) 59 1 60 61 """ 62 whitespace = re.compile(r'[\t ]+') 63 if self.defnData: 64 from rdkit.six.moves import cStringIO as StringIO 65 inF = StringIO(self.defnData) 66 else: 67 inF = open(self.defnFilename,'r') 68 self.salts = [] 69 for line in inF: 70 line = line.strip().split('//')[0] 71 if line: 72 splitL = whitespace.split(line) 73 try: 74 salt = Chem.MolFromSmarts(splitL[0]) 75 except: 76 import traceback 77 traceback.print_exc() 78 raise ValueError(line) 79 self.salts.append(salt)
80
81 - def StripMol(self,mol,dontRemoveEverything=False):
82 """ 83 84 >>> remover = SaltRemover(defnData="[Cl,Br]") 85 >>> len(remover.salts) 86 1 87 88 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 89 >>> res = remover.StripMol(mol) 90 >>> res is not None 91 True 92 >>> res.GetNumAtoms() 93 4 94 95 Notice that all salts are removed: 96 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br') 97 >>> res = remover.StripMol(mol) 98 >>> res.GetNumAtoms() 99 4 100 101 Matching (e.g. "salt-like") atoms in the molecule are unchanged: 102 >>> mol = Chem.MolFromSmiles('CN(Br)Cl') 103 >>> res = remover.StripMol(mol) 104 >>> res.GetNumAtoms() 105 4 106 107 >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') 108 >>> res = remover.StripMol(mol) 109 >>> res.GetNumAtoms() 110 4 111 112 Charged salts are handled reasonably: 113 >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]') 114 >>> res = remover.StripMol(mol) 115 >>> res.GetNumAtoms() 116 4 117 118 119 Watch out for this case (everything removed): 120 >>> remover = SaltRemover() 121 >>> len(remover.salts)>1 122 True 123 >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]') 124 >>> res = remover.StripMol(mol) 125 >>> res.GetNumAtoms() 126 0 127 128 dontRemoveEverything helps with this by leaving the last salt: 129 >>> res = remover.StripMol(mol,dontRemoveEverything=True) 130 >>> res.GetNumAtoms() 131 4 132 133 but in cases where the last salts are the same, it can't choose 134 between them, so it returns all of them: 135 >>> mol = Chem.MolFromSmiles('Cl.Cl') 136 >>> res = remover.StripMol(mol,dontRemoveEverything=True) 137 >>> res.GetNumAtoms() 138 2 139 140 """ 141 def _applyPattern(m,salt,notEverything): 142 nAts = m.GetNumAtoms() 143 if not nAts: 144 return m 145 res = m 146 147 t = Chem.DeleteSubstructs(res,salt,True) 148 if not t or (notEverything and t.GetNumAtoms()==0): 149 return res; 150 else: 151 res = t 152 while res.GetNumAtoms() and nAts>res.GetNumAtoms(): 153 nAts = res.GetNumAtoms() 154 t = Chem.DeleteSubstructs(res,salt,True) 155 if notEverything and t.GetNumAtoms()==0: 156 break 157 else: 158 res = t 159 return res
160 161 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1: 162 return mol 163 modified=False 164 for i,salt in enumerate(self.salts): 165 tMol = _applyPattern(mol,salt,dontRemoveEverything) 166 if tMol is not mol: 167 mol = tMol 168 modified=True 169 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1: 170 break 171 if modified and mol.GetNumAtoms()>0: 172 Chem.SanitizeMol(mol) 173 return mol
174
175 - def __call__(self,mol,dontRemoveEverything=False):
176 """ 177 178 >>> remover = SaltRemover(defnData="[Cl,Br]") 179 >>> len(remover.salts) 180 1 181 182 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 183 >>> res = remover(mol) 184 >>> res is not None 185 True 186 >>> res.GetNumAtoms() 187 4 188 189 """ 190 return self.StripMol(mol,dontRemoveEverything=dontRemoveEverything)
191 192 193 #------------------------------------ 194 # 195 # doctest boilerplate 196 #
197 -def _test():
198 import doctest,sys 199 return doctest.testmod(sys.modules["__main__"])
200 201 202 if __name__ == '__main__': 203 import sys 204 failed,tried = _test() 205 sys.exit(failed) 206