1
2
3
4
5
6
7
8
9
10
11 """ Import all RDKit chemistry modules
12
13 """
14 from rdkit import rdBase
15 from rdkit import RDConfig
16 from rdkit import DataStructs
17 from rdkit.Geometry import rdGeometry
18 from rdkit.Chem import *
19 from rdkit.Chem.rdPartialCharges import *
20 from rdkit.Chem.rdDepictor import *
21 from rdkit.Chem.rdForceFieldHelpers import *
22 from rdkit.Chem.ChemicalFeatures import *
23 from rdkit.Chem.rdDistGeom import *
24 from rdkit.Chem.rdMolAlign import *
25 from rdkit.Chem.rdMolTransforms import *
26 from rdkit.Chem.rdShapeHelpers import *
27 from rdkit.Chem.rdChemReactions import *
28 try:
29 from rdkit.Chem.rdSLNParse import *
30 except:
31 pass
32 from rdkit.Chem.rdMolDescriptors import *
33 from rdkit.Chem.rdqueries import *
34 from rdkit import ForceField
35 Mol.Compute2DCoords = Compute2DCoords
36 Mol.ComputeGasteigerCharges = ComputeGasteigerCharges
37 import numpy, os
38 from rdkit.RDLogger import logger
39 logger = logger()
40 import warnings
55
57 """ returns a grid representation of the molecule's shape
58 """
59 res = rdGeometry.UniformGrid3D(boxDim[0],boxDim[1],boxDim[2],spacing=spacing)
60 EncodeShape(mol,res,confId,**kwargs)
61 return res
62
64 """ Calculates the volume of a particular conformer of a molecule
65 based on a grid-encoding of the molecular shape.
66
67 """
68 mol = rdchem.Mol(mol)
69 conf = mol.GetConformer(confId)
70 CanonicalizeConformer(conf)
71 box = ComputeConfBox(conf)
72 sideLen = ( box[1].x-box[0].x + 2*boxMargin, \
73 box[1].y-box[0].y + 2*boxMargin, \
74 box[1].z-box[0].z + 2*boxMargin )
75 shape = rdGeometry.UniformGrid3D(sideLen[0],sideLen[1],sideLen[2],
76 spacing=gridSpacing)
77 EncodeShape(mol,shape,confId,ignoreHs=False,vdwScale=1.0)
78 voxelVol = gridSpacing**3
79 occVect = shape.GetOccupancyVect()
80 voxels = [1 for x in occVect if x==3]
81 vol = voxelVol*len(voxels)
82 return vol
83
88 """ Generates a depiction for a molecule where a piece of the molecule
89 is constrained to have the same coordinates as a reference.
90
91 This is useful for, for example, generating depictions of SAR data
92 sets so that the cores of the molecules are all oriented the same
93 way.
94
95 Arguments:
96 - mol: the molecule to be aligned, this will come back
97 with a single conformer.
98 - reference: a molecule with the reference atoms to align to;
99 this should have a depiction.
100 - confId: (optional) the id of the reference conformation to use
101 - referencePattern: (optional) an optional molecule to be used to
102 generate the atom mapping between the molecule
103 and the reference.
104 - acceptFailure: (optional) if True, standard depictions will be generated
105 for molecules that don't have a substructure match to the
106 reference; if False, a ValueError will be raised
107
108 """
109 if reference and referencePattern:
110 if not reference.GetNumAtoms(onlyExplicit=True)==referencePattern.GetNumAtoms(onlyExplicit=True):
111 raise ValueError('When a pattern is provided, it must have the same number of atoms as the reference')
112 referenceMatch = reference.GetSubstructMatch(referencePattern)
113 if not referenceMatch:
114 raise ValueError("Reference does not map to itself")
115 else:
116 referenceMatch = range(reference.GetNumAtoms(onlyExplicit=True))
117 if referencePattern:
118 match = mol.GetSubstructMatch(referencePattern)
119 else:
120 match = mol.GetSubstructMatch(reference)
121
122 if not match:
123 if not acceptFailure:
124 raise ValueError('Substructure match with reference not found.')
125 else:
126 coordMap={}
127 else:
128 conf = reference.GetConformer()
129 coordMap={}
130 for i,idx in enumerate(match):
131 pt3 = conf.GetAtomPosition(referenceMatch[i])
132 pt2 = rdGeometry.Point2D(pt3.x,pt3.y)
133 coordMap[idx] = pt2
134 Compute2DCoords(mol,clearConfs=True,coordMap=coordMap,canonOrient=False)
135
138 """ Generates a depiction for a molecule where a piece of the molecule
139 is constrained to have coordinates similar to those of a 3D reference
140 structure.
141
142 Arguments:
143 - mol: the molecule to be aligned, this will come back
144 with a single conformer.
145 - reference: a molecule with the reference atoms to align to;
146 this should have a depiction.
147 - confId: (optional) the id of the reference conformation to use
148
149 """
150 nAts = mol.GetNumAtoms()
151 dm = []
152 conf = reference.GetConformer(confId)
153 for i in range(nAts):
154 pi = conf.GetAtomPosition(i)
155
156 for j in range(i+1,nAts):
157 pj = conf.GetAtomPosition(j)
158
159 dm.append((pi-pj).Length())
160 dm = numpy.array(dm)
161 Compute2DCoordsMimicDistmat(mol,dm,**kwargs)
162
163 -def GetBestRMS(ref,probe,refConfId=-1,probeConfId=-1,maps=None):
164 """ Returns the optimal RMS for aligning two molecules, taking
165 symmetry into account. As a side-effect, the probe molecule is
166 left in the aligned state.
167
168 Arguments:
169 - ref: the reference molecule
170 - probe: the molecule to be aligned to the reference
171 - refConfId: (optional) reference conformation to use
172 - probeConfId: (optional) probe conformation to use
173 - maps: (optional) a list of lists of (probeAtomId,refAtomId)
174 tuples with the atom-atom mappings of the two molecules.
175 If not provided, these will be generated using a substructure
176 search.
177
178 Note:
179 This function will attempt to align all permutations of matching atom
180 orders in both molecules, for some molecules it will lead to 'combinatorial
181 explosion' especially if hydrogens are present.
182 Use 'rdkit.Chem.AllChem.AlignMol' to align molecules without changing the
183 atom order.
184
185 """
186 if not maps:
187 matches = ref.GetSubstructMatches(probe,uniquify=False)
188 if not matches:
189 raise ValueError('mol %s does not match mol %s'%(ref.GetProp('_Name'),
190 probe.GetProp('_Name')))
191 if len(matches) > 1e6:
192 warnings.warn("{} matches detected for molecule {}, this may lead to a performance slowdown.".format(len(matches), probe.GetProp('_Name')))
193 maps = [list(enumerate(match)) for match in matches]
194 bestRMS=1000.
195 for amap in maps:
196 rms=AlignMol(probe,ref,probeConfId,refConfId,atomMap=amap)
197 if rms<bestRMS:
198 bestRMS=rms
199 bestMap = amap
200
201
202 if bestMap != amap:
203 AlignMol(probe,ref,probeConfId,refConfId,atomMap=bestMap)
204 return bestRMS
205
240
282
284 """ Returns a generator for the virtual library defined by
285 a reaction and a sequence of sidechain sets
286
287 >>> from rdkit import Chem
288 >>> from rdkit.Chem import AllChem
289 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')]
290 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')]
291 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]')
292 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1])
293 >>> [Chem.MolToSmiles(x[0]) for x in list(r)]
294 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O']
295
296 Note that this is all done in a lazy manner, so "infinitely" large libraries can
297 be done without worrying about running out of memory. Your patience will run out first:
298
299 Define a set of 10000 amines:
300 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000))
301
302 ... a set of 10000 acids
303 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000))
304
305 ... now the virtual library (1e8 compounds in principle):
306 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines])
307
308 ... look at the first 4 compounds:
309 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)]
310 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O']
311
312
313 """
314 if len(sidechainSets) != reaction.GetNumReactantTemplates():
315 raise ValueError('%d sidechains provided, %d required' %
316 (len(sidechainSets),reaction.GetNumReactantTemplates()))
317
318 def _combiEnumerator(items,depth=0):
319 for item in items[depth]:
320 if depth+1 < len(items):
321 v = _combiEnumerator(items,depth+1)
322 for entry in v:
323 l=[item]
324 l.extend(entry)
325 yield l
326 else:
327 yield [item]
328 for chains in _combiEnumerator(sidechainSets):
329 prodSets = reaction.RunReactants(chains)
330 for prods in prodSets:
331 yield prods
332
333 -def ConstrainedEmbed(mol,core,useTethers=True,coreConfId=-1,
334 randomseed=2342,getForceField=UFFGetMoleculeForceField,**kwargs):
335 """ generates an embedding of a molecule where part of the molecule
336 is constrained to have particular coordinates
337
338 Arguments
339 - mol: the molecule to embed
340 - core: the molecule to use as a source of constraints
341 - useTethers: (optional) if True, the final conformation will be
342 optimized subject to a series of extra forces that pull the
343 matching atoms to the positions of the core atoms. Otherwise
344 simple distance constraints based on the core atoms will be
345 used in the optimization.
346 - coreConfId: (optional) id of the core conformation to use
347 - randomSeed: (optional) seed for the random number generator
348
349
350 An example, start by generating a template with a 3D structure:
351 >>> from rdkit.Chem import AllChem
352 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1")
353 >>> AllChem.EmbedMolecule(template)
354 0
355 >>> AllChem.UFFOptimizeMolecule(template)
356 0
357
358 Here's a molecule:
359 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3")
360
361 Now do the constrained embedding
362 >>> newmol=AllChem.ConstrainedEmbed(mol, template)
363
364 Demonstrate that the positions are the same:
365 >>> newp=newmol.GetConformer().GetAtomPosition(0)
366 >>> molp=mol.GetConformer().GetAtomPosition(0)
367 >>> list(newp-molp)==[0.0,0.0,0.0]
368 True
369 >>> newp=newmol.GetConformer().GetAtomPosition(1)
370 >>> molp=mol.GetConformer().GetAtomPosition(1)
371 >>> list(newp-molp)==[0.0,0.0,0.0]
372 True
373
374 """
375 match = mol.GetSubstructMatch(core)
376 if not match:
377 raise ValueError("molecule doesn't match the core")
378 coordMap={}
379 coreConf = core.GetConformer(coreConfId)
380 for i,idxI in enumerate(match):
381 corePtI = coreConf.GetAtomPosition(i)
382 coordMap[idxI]=corePtI
383
384 ci = EmbedMolecule(mol,coordMap=coordMap,randomSeed=randomseed,**kwargs)
385 if ci<0:
386 raise ValueError('Could not embed molecule.')
387
388 algMap=[(j,i) for i,j in enumerate(match)]
389
390 if not useTethers:
391
392 ff = getForceField(mol,confId=0)
393 for i,idxI in enumerate(match):
394 for j in range(i+1,len(match)):
395 idxJ = match[j]
396 d = coordMap[idxI].Distance(coordMap[idxJ])
397 ff.AddDistanceConstraint(idxI,idxJ,d,d,100.)
398 ff.Initialize()
399 n=4
400 more=ff.Minimize()
401 while more and n:
402 more=ff.Minimize()
403 n-=1
404
405 rms =AlignMol(mol,core,atomMap=algMap)
406 else:
407
408 rms = AlignMol(mol,core,atomMap=algMap)
409 ff = getForceField(mol,confId=0)
410 conf = core.GetConformer()
411 for i in range(core.GetNumAtoms()):
412 p =conf.GetAtomPosition(i)
413 pIdx=ff.AddExtraPoint(p.x,p.y,p.z,fixed=True)-1
414 ff.AddDistanceConstraint(pIdx,match[i],0,0,100.)
415 ff.Initialize()
416 n=4
417 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3)
418 while more and n:
419 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3)
420 n-=1
421
422 rms = AlignMol(mol,core,atomMap=algMap)
423 mol.SetProp('EmbedRMS',str(rms))
424 return mol
425
427 """ assigns bond orders to a molecule based on the
428 bond orders in a template molecule
429
430 Arguments
431 - refmol: the template molecule
432 - mol: the molecule to assign bond orders to
433
434 An example, start by generating a template from a SMILES
435 and read in the PDB structure of the molecule
436 >>> from rdkit.Chem import AllChem
437 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N")
438 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb'))
439 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
440 8
441 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
442 22
443
444 Now assign the bond orders based on the template molecule
445 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
446 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
447 8
448
449 Note that the template molecule should have no explicit hydrogens
450 else the algorithm will fail.
451
452 It also works if there are different formal charges (this was github issue 235):
453 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]')
454 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol'))
455 >>> AllChem.MolToSmiles(mol)
456 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O'
457 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
458 >>> AllChem.MolToSmiles(newMol)
459 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]'
460
461 """
462 refmol2 = rdchem.Mol(refmol)
463 mol2 = rdchem.Mol(mol)
464
465 matching = mol2.GetSubstructMatch(refmol2)
466 if not matching:
467
468 for b in mol2.GetBonds():
469 if b.GetBondType() != BondType.SINGLE:
470 b.SetBondType(BondType.SINGLE)
471 b.SetIsAromatic(False)
472
473 for b in refmol2.GetBonds():
474 b.SetBondType(BondType.SINGLE)
475 b.SetIsAromatic(False)
476
477 for a in refmol2.GetAtoms():
478 a.SetFormalCharge(0)
479 for a in mol2.GetAtoms():
480 a.SetFormalCharge(0)
481
482 matching = mol2.GetSubstructMatches(refmol2, uniquify=False)
483
484 if matching:
485 if len(matching) > 1:
486 logger.warning("More than one matching pattern found - picking one")
487 matching = matching[0]
488
489 for b in refmol.GetBonds():
490 atom1 = matching[b.GetBeginAtomIdx()]
491 atom2 = matching[b.GetEndAtomIdx()]
492 b2 = mol2.GetBondBetweenAtoms(atom1, atom2)
493 b2.SetBondType(b.GetBondType())
494 b2.SetIsAromatic(b.GetIsAromatic())
495
496 for a in refmol.GetAtoms():
497 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()])
498 a2.SetHybridization(a.GetHybridization())
499 a2.SetIsAromatic(a.GetIsAromatic())
500 a2.SetNumExplicitHs(a.GetNumExplicitHs())
501 a2.SetFormalCharge(a.GetFormalCharge())
502 SanitizeMol(mol2)
503 if hasattr(mol2, '__sssAtoms'):
504 mol2.__sssAtoms = None
505 else:
506 raise ValueError("No matching found")
507 return mol2
508
509
510
511
512
513
515 import doctest,sys
516 return doctest.testmod(sys.modules["__main__"])
517
518
519 if __name__ == '__main__':
520 import sys
521 failed,tried = _test()
522 sys.exit(failed)
523