Package rdkit :: Package Chem :: Module AllChem
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AllChem

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2006-2011  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Import all RDKit chemistry modules 
 12   
 13  """ 
 14  from rdkit import rdBase 
 15  from rdkit import RDConfig 
 16  from rdkit import DataStructs 
 17  from rdkit.Geometry import rdGeometry 
 18  from rdkit.Chem import * 
 19  from rdkit.Chem.rdPartialCharges import * 
 20  from rdkit.Chem.rdDepictor import * 
 21  from rdkit.Chem.rdForceFieldHelpers import * 
 22  from rdkit.Chem.ChemicalFeatures import * 
 23  from rdkit.Chem.rdDistGeom import * 
 24  from rdkit.Chem.rdMolAlign import * 
 25  from rdkit.Chem.rdMolTransforms import * 
 26  from rdkit.Chem.rdShapeHelpers import * 
 27  from rdkit.Chem.rdChemReactions import * 
 28  try: 
 29    from rdkit.Chem.rdSLNParse import * 
 30  except: 
 31    pass 
 32  from rdkit.Chem.rdMolDescriptors import * 
 33  from rdkit.Chem.rdqueries import * 
 34  from rdkit import ForceField 
 35  Mol.Compute2DCoords = Compute2DCoords 
 36  Mol.ComputeGasteigerCharges = ComputeGasteigerCharges 
 37  import numpy, os 
 38  from rdkit.RDLogger import logger 
 39  logger = logger() 
 40  import warnings 
41 -def TransformMol(mol,tform,confId=-1,keepConfs=False):
42 """ Applies the transformation (usually a 4x4 double matrix) to a molecule 43 if keepConfs is False then all but that conformer are removed 44 45 """ 46 refConf = mol.GetConformer(confId) 47 TransformConformer(refConf,tform) 48 if not keepConfs: 49 if confId==-1: confId=0 50 allConfIds = [c.GetId() for c in mol.GetConformers()] 51 for id in allConfIds: 52 if not id==confId: mol.RemoveConformer(id) 53 #reset the conf Id to zero since there is only one conformer left 54 mol.GetConformer(confId).SetId(0)
55
56 -def ComputeMolShape(mol,confId=-1,boxDim=(20,20,20),spacing=0.5,**kwargs):
57 """ returns a grid representation of the molecule's shape 58 """ 59 res = rdGeometry.UniformGrid3D(boxDim[0],boxDim[1],boxDim[2],spacing=spacing) 60 EncodeShape(mol,res,confId,**kwargs) 61 return res
62
63 -def ComputeMolVolume(mol,confId=-1,gridSpacing=0.2,boxMargin=2.0):
64 """ Calculates the volume of a particular conformer of a molecule 65 based on a grid-encoding of the molecular shape. 66 67 """ 68 mol = rdchem.Mol(mol) 69 conf = mol.GetConformer(confId) 70 CanonicalizeConformer(conf) 71 box = ComputeConfBox(conf) 72 sideLen = ( box[1].x-box[0].x + 2*boxMargin, \ 73 box[1].y-box[0].y + 2*boxMargin, \ 74 box[1].z-box[0].z + 2*boxMargin ) 75 shape = rdGeometry.UniformGrid3D(sideLen[0],sideLen[1],sideLen[2], 76 spacing=gridSpacing) 77 EncodeShape(mol,shape,confId,ignoreHs=False,vdwScale=1.0) 78 voxelVol = gridSpacing**3 79 occVect = shape.GetOccupancyVect() 80 voxels = [1 for x in occVect if x==3] 81 vol = voxelVol*len(voxels) 82 return vol
83
84 -def GenerateDepictionMatching2DStructure(mol,reference,confId=-1, 85 referencePattern=None, 86 acceptFailure=False, 87 **kwargs):
88 """ Generates a depiction for a molecule where a piece of the molecule 89 is constrained to have the same coordinates as a reference. 90 91 This is useful for, for example, generating depictions of SAR data 92 sets so that the cores of the molecules are all oriented the same 93 way. 94 95 Arguments: 96 - mol: the molecule to be aligned, this will come back 97 with a single conformer. 98 - reference: a molecule with the reference atoms to align to; 99 this should have a depiction. 100 - confId: (optional) the id of the reference conformation to use 101 - referencePattern: (optional) an optional molecule to be used to 102 generate the atom mapping between the molecule 103 and the reference. 104 - acceptFailure: (optional) if True, standard depictions will be generated 105 for molecules that don't have a substructure match to the 106 reference; if False, a ValueError will be raised 107 108 """ 109 if reference and referencePattern: 110 if not reference.GetNumAtoms(onlyExplicit=True)==referencePattern.GetNumAtoms(onlyExplicit=True): 111 raise ValueError('When a pattern is provided, it must have the same number of atoms as the reference') 112 referenceMatch = reference.GetSubstructMatch(referencePattern) 113 if not referenceMatch: 114 raise ValueError("Reference does not map to itself") 115 else: 116 referenceMatch = range(reference.GetNumAtoms(onlyExplicit=True)) 117 if referencePattern: 118 match = mol.GetSubstructMatch(referencePattern) 119 else: 120 match = mol.GetSubstructMatch(reference) 121 122 if not match: 123 if not acceptFailure: 124 raise ValueError('Substructure match with reference not found.') 125 else: 126 coordMap={} 127 else: 128 conf = reference.GetConformer() 129 coordMap={} 130 for i,idx in enumerate(match): 131 pt3 = conf.GetAtomPosition(referenceMatch[i]) 132 pt2 = rdGeometry.Point2D(pt3.x,pt3.y) 133 coordMap[idx] = pt2 134 Compute2DCoords(mol,clearConfs=True,coordMap=coordMap,canonOrient=False)
135
136 -def GenerateDepictionMatching3DStructure(mol,reference,confId=-1, 137 **kwargs):
138 """ Generates a depiction for a molecule where a piece of the molecule 139 is constrained to have coordinates similar to those of a 3D reference 140 structure. 141 142 Arguments: 143 - mol: the molecule to be aligned, this will come back 144 with a single conformer. 145 - reference: a molecule with the reference atoms to align to; 146 this should have a depiction. 147 - confId: (optional) the id of the reference conformation to use 148 149 """ 150 nAts = mol.GetNumAtoms() 151 dm = [] 152 conf = reference.GetConformer(confId) 153 for i in range(nAts): 154 pi = conf.GetAtomPosition(i) 155 #npi.z=0 156 for j in range(i+1,nAts): 157 pj = conf.GetAtomPosition(j) 158 #pj.z=0 159 dm.append((pi-pj).Length()) 160 dm = numpy.array(dm) 161 Compute2DCoordsMimicDistmat(mol,dm,**kwargs)
162
163 -def GetBestRMS(ref,probe,refConfId=-1,probeConfId=-1,maps=None):
164 """ Returns the optimal RMS for aligning two molecules, taking 165 symmetry into account. As a side-effect, the probe molecule is 166 left in the aligned state. 167 168 Arguments: 169 - ref: the reference molecule 170 - probe: the molecule to be aligned to the reference 171 - refConfId: (optional) reference conformation to use 172 - probeConfId: (optional) probe conformation to use 173 - maps: (optional) a list of lists of (probeAtomId,refAtomId) 174 tuples with the atom-atom mappings of the two molecules. 175 If not provided, these will be generated using a substructure 176 search. 177 178 Note: 179 This function will attempt to align all permutations of matching atom 180 orders in both molecules, for some molecules it will lead to 'combinatorial 181 explosion' especially if hydrogens are present. 182 Use 'rdkit.Chem.AllChem.AlignMol' to align molecules without changing the 183 atom order. 184 185 """ 186 if not maps: 187 matches = ref.GetSubstructMatches(probe,uniquify=False) 188 if not matches: 189 raise ValueError('mol %s does not match mol %s'%(ref.GetProp('_Name'), 190 probe.GetProp('_Name'))) 191 if len(matches) > 1e6: 192 warnings.warn("{} matches detected for molecule {}, this may lead to a performance slowdown.".format(len(matches), probe.GetProp('_Name'))) 193 maps = [list(enumerate(match)) for match in matches] 194 bestRMS=1000. 195 for amap in maps: 196 rms=AlignMol(probe,ref,probeConfId,refConfId,atomMap=amap) 197 if rms<bestRMS: 198 bestRMS=rms 199 bestMap = amap 200 201 # finally repeate the best alignment : 202 if bestMap != amap: 203 AlignMol(probe,ref,probeConfId,refConfId,atomMap=bestMap) 204 return bestRMS
205
206 -def GetConformerRMS(mol,confId1,confId2,atomIds=None,prealigned=False):
207 """ Returns the RMS between two conformations. 208 By default, the conformers will be aligned to the first conformer 209 of the molecule (i.e. the reference) before RMS calculation and, 210 as a side-effect, will be left in the aligned state. 211 212 Arguments: 213 - mol: the molecule 214 - confId1: the id of the first conformer 215 - confId2: the id of the second conformer 216 - atomIds: (optional) list of atom ids to use a points for 217 alingment - defaults to all atoms 218 - prealigned: (optional) by default the conformers are assumed 219 be unaligned and will therefore be aligned to the 220 first conformer 221 222 """ 223 # align the conformers if necessary 224 # Note: the reference conformer is always the first one 225 if not prealigned: 226 if atomIds: 227 AlignMolConformers(mol, confIds=[confId1,confId2], atomIds=atomIds) 228 else: 229 AlignMolConformers(mol, confIds=[confId1,confId2]) 230 231 # calculate the RMS between the two conformations 232 conf1 = mol.GetConformer(id=confId1) 233 conf2 = mol.GetConformer(id=confId2) 234 ssr = 0 235 for i in range(mol.GetNumAtoms()): 236 d = conf1.GetAtomPosition(i).Distance(conf2.GetAtomPosition(i)) 237 ssr += d*d 238 ssr /= mol.GetNumAtoms() 239 return numpy.sqrt(ssr)
240
241 -def GetConformerRMSMatrix(mol,atomIds=None,prealigned=False):
242 """ Returns the RMS matrix of the conformers of a molecule. 243 As a side-effect, the conformers will be aligned to the first 244 conformer (i.e. the reference) and will left in the aligned state. 245 246 Arguments: 247 - mol: the molecule 248 - atomIds: (optional) list of atom ids to use a points for 249 alingment - defaults to all atoms 250 - prealigned: (optional) by default the conformers are assumed 251 be unaligned and will therefore be aligned to the 252 first conformer 253 254 Note that the returned RMS matrix is symmetrically, i.e. it is the 255 lower half of the matrix, e.g. for 5 conformers: 256 rmsmatrix = [ a, 257 b, c, 258 d, e, f, 259 g, h, i, j] 260 This way it can be directly used as distance matrix in e.g. Butina 261 clustering. 262 263 """ 264 # if necessary, align the conformers 265 # Note: the reference conformer is always the first one 266 rmsvals = [] 267 if not prealigned: 268 if atomIds: 269 AlignMolConformers(mol, atomIds=atomIds, RMSlist=rmsvals) 270 else: 271 AlignMolConformers(mol, RMSlist=rmsvals) 272 else: # already prealigned 273 for i in range(1, mol.GetNumConformers()): 274 rmsvals.append(GetConformerRMS(mol, 0, i, atomIds=atomIds, prealigned=prealigned)) 275 # loop over the conformations (except the reference one) 276 cmat = [] 277 for i in range(1, mol.GetNumConformers()): 278 cmat.append(rmsvals[i-1]) 279 for j in range(1,i): 280 cmat.append(GetConformerRMS(mol, i, j, atomIds=atomIds, prealigned=True)) 281 return cmat
282
283 -def EnumerateLibraryFromReaction(reaction,sidechainSets) :
284 """ Returns a generator for the virtual library defined by 285 a reaction and a sequence of sidechain sets 286 287 >>> from rdkit import Chem 288 >>> from rdkit.Chem import AllChem 289 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')] 290 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')] 291 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]') 292 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1]) 293 >>> [Chem.MolToSmiles(x[0]) for x in list(r)] 294 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O'] 295 296 Note that this is all done in a lazy manner, so "infinitely" large libraries can 297 be done without worrying about running out of memory. Your patience will run out first: 298 299 Define a set of 10000 amines: 300 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000)) 301 302 ... a set of 10000 acids 303 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000)) 304 305 ... now the virtual library (1e8 compounds in principle): 306 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines]) 307 308 ... look at the first 4 compounds: 309 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)] 310 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O'] 311 312 313 """ 314 if len(sidechainSets) != reaction.GetNumReactantTemplates(): 315 raise ValueError('%d sidechains provided, %d required' % 316 (len(sidechainSets),reaction.GetNumReactantTemplates())) 317 318 def _combiEnumerator(items,depth=0): 319 for item in items[depth]: 320 if depth+1 < len(items): 321 v = _combiEnumerator(items,depth+1) 322 for entry in v: 323 l=[item] 324 l.extend(entry) 325 yield l 326 else: 327 yield [item]
328 for chains in _combiEnumerator(sidechainSets): 329 prodSets = reaction.RunReactants(chains) 330 for prods in prodSets: 331 yield prods 332
333 -def ConstrainedEmbed(mol,core,useTethers=True,coreConfId=-1, 334 randomseed=2342,getForceField=UFFGetMoleculeForceField,**kwargs):
335 """ generates an embedding of a molecule where part of the molecule 336 is constrained to have particular coordinates 337 338 Arguments 339 - mol: the molecule to embed 340 - core: the molecule to use as a source of constraints 341 - useTethers: (optional) if True, the final conformation will be 342 optimized subject to a series of extra forces that pull the 343 matching atoms to the positions of the core atoms. Otherwise 344 simple distance constraints based on the core atoms will be 345 used in the optimization. 346 - coreConfId: (optional) id of the core conformation to use 347 - randomSeed: (optional) seed for the random number generator 348 349 350 An example, start by generating a template with a 3D structure: 351 >>> from rdkit.Chem import AllChem 352 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1") 353 >>> AllChem.EmbedMolecule(template) 354 0 355 >>> AllChem.UFFOptimizeMolecule(template) 356 0 357 358 Here's a molecule: 359 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3") 360 361 Now do the constrained embedding 362 >>> newmol=AllChem.ConstrainedEmbed(mol, template) 363 364 Demonstrate that the positions are the same: 365 >>> newp=newmol.GetConformer().GetAtomPosition(0) 366 >>> molp=mol.GetConformer().GetAtomPosition(0) 367 >>> list(newp-molp)==[0.0,0.0,0.0] 368 True 369 >>> newp=newmol.GetConformer().GetAtomPosition(1) 370 >>> molp=mol.GetConformer().GetAtomPosition(1) 371 >>> list(newp-molp)==[0.0,0.0,0.0] 372 True 373 374 """ 375 match = mol.GetSubstructMatch(core) 376 if not match: 377 raise ValueError("molecule doesn't match the core") 378 coordMap={} 379 coreConf = core.GetConformer(coreConfId) 380 for i,idxI in enumerate(match): 381 corePtI = coreConf.GetAtomPosition(i) 382 coordMap[idxI]=corePtI 383 384 ci = EmbedMolecule(mol,coordMap=coordMap,randomSeed=randomseed,**kwargs) 385 if ci<0: 386 raise ValueError('Could not embed molecule.') 387 388 algMap=[(j,i) for i,j in enumerate(match)] 389 390 if not useTethers: 391 # clean up the conformation 392 ff = getForceField(mol,confId=0) 393 for i,idxI in enumerate(match): 394 for j in range(i+1,len(match)): 395 idxJ = match[j] 396 d = coordMap[idxI].Distance(coordMap[idxJ]) 397 ff.AddDistanceConstraint(idxI,idxJ,d,d,100.) 398 ff.Initialize() 399 n=4 400 more=ff.Minimize() 401 while more and n: 402 more=ff.Minimize() 403 n-=1 404 # rotate the embedded conformation onto the core: 405 rms =AlignMol(mol,core,atomMap=algMap) 406 else: 407 # rotate the embedded conformation onto the core: 408 rms = AlignMol(mol,core,atomMap=algMap) 409 ff = getForceField(mol,confId=0) 410 conf = core.GetConformer() 411 for i in range(core.GetNumAtoms()): 412 p =conf.GetAtomPosition(i) 413 pIdx=ff.AddExtraPoint(p.x,p.y,p.z,fixed=True)-1 414 ff.AddDistanceConstraint(pIdx,match[i],0,0,100.) 415 ff.Initialize() 416 n=4 417 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3) 418 while more and n: 419 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3) 420 n-=1 421 # realign 422 rms = AlignMol(mol,core,atomMap=algMap) 423 mol.SetProp('EmbedRMS',str(rms)) 424 return mol
425
426 -def AssignBondOrdersFromTemplate(refmol, mol):
427 """ assigns bond orders to a molecule based on the 428 bond orders in a template molecule 429 430 Arguments 431 - refmol: the template molecule 432 - mol: the molecule to assign bond orders to 433 434 An example, start by generating a template from a SMILES 435 and read in the PDB structure of the molecule 436 >>> from rdkit.Chem import AllChem 437 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N") 438 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb')) 439 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 440 8 441 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 442 22 443 444 Now assign the bond orders based on the template molecule 445 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 446 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 447 8 448 449 Note that the template molecule should have no explicit hydrogens 450 else the algorithm will fail. 451 452 It also works if there are different formal charges (this was github issue 235): 453 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]') 454 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol')) 455 >>> AllChem.MolToSmiles(mol) 456 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O' 457 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 458 >>> AllChem.MolToSmiles(newMol) 459 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]' 460 461 """ 462 refmol2 = rdchem.Mol(refmol) 463 mol2 = rdchem.Mol(mol) 464 # do the molecules match already? 465 matching = mol2.GetSubstructMatch(refmol2) 466 if not matching: # no, they don't match 467 # check if bonds of mol are SINGLE 468 for b in mol2.GetBonds(): 469 if b.GetBondType() != BondType.SINGLE: 470 b.SetBondType(BondType.SINGLE) 471 b.SetIsAromatic(False) 472 # set the bonds of mol to SINGLE 473 for b in refmol2.GetBonds(): 474 b.SetBondType(BondType.SINGLE) 475 b.SetIsAromatic(False) 476 # set atom charges to zero; 477 for a in refmol2.GetAtoms(): 478 a.SetFormalCharge(0) 479 for a in mol2.GetAtoms(): 480 a.SetFormalCharge(0) 481 482 matching = mol2.GetSubstructMatches(refmol2, uniquify=False) 483 # do the molecules match now? 484 if matching: 485 if len(matching) > 1: 486 logger.warning("More than one matching pattern found - picking one") 487 matching = matching[0] 488 # apply matching: set bond properties 489 for b in refmol.GetBonds(): 490 atom1 = matching[b.GetBeginAtomIdx()] 491 atom2 = matching[b.GetEndAtomIdx()] 492 b2 = mol2.GetBondBetweenAtoms(atom1, atom2) 493 b2.SetBondType(b.GetBondType()) 494 b2.SetIsAromatic(b.GetIsAromatic()) 495 # apply matching: set atom properties 496 for a in refmol.GetAtoms(): 497 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) 498 a2.SetHybridization(a.GetHybridization()) 499 a2.SetIsAromatic(a.GetIsAromatic()) 500 a2.SetNumExplicitHs(a.GetNumExplicitHs()) 501 a2.SetFormalCharge(a.GetFormalCharge()) 502 SanitizeMol(mol2) 503 if hasattr(mol2, '__sssAtoms'): 504 mol2.__sssAtoms = None # we don't want all bonds highlighted 505 else: 506 raise ValueError("No matching found") 507 return mol2
508 509 510 #------------------------------------ 511 # 512 # doctest boilerplate 513 #
514 -def _test():
515 import doctest,sys 516 return doctest.testmod(sys.modules["__main__"])
517 518 519 if __name__ == '__main__': 520 import sys 521 failed,tried = _test() 522 sys.exit(failed) 523