Package rdkit :: Package Chem :: Module SATIS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SATIS

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2001-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """  Functionality for SATIS typing atoms 
 12   
 13  """ 
 14  from __future__ import print_function 
 15  from rdkit import Chem 
 16  from rdkit.six.moves import xrange 
 17   
 18  _debug = 0 
 19   
 20  # 
 21  #  These are SMARTS patterns for the special cases used in 
 22  #  SATIS typing. 
 23  # 
 24  aldehydePatt = Chem.MolFromSmarts('[CD2]=[OD1]') 
 25  ketonePatt = Chem.MolFromSmarts('[CD3]=[OD1]') 
 26  amidePatt = Chem.MolFromSmarts('[CD3](=[OD1])-[#7]') 
 27  esterPatt = Chem.MolFromSmarts('C(=[OD1])-O-[#6]') 
 28  carboxylatePatt = Chem.MolFromSmarts('C(=[OD1])-[OX1]') 
 29  carboxylPatt = Chem.MolFromSmarts('C(=[OD1])-[OX2]') 
 30   
 31  specialCases = ((carboxylatePatt,97), 
 32                  (esterPatt,96), 
 33                  (carboxylPatt,98), 
 34                  (amidePatt,95), 
 35                  (ketonePatt,94), 
 36                  (aldehydePatt,93)) 
 37   
 38   
39 -def SATISTypes(mol,neighborsToInclude=4):
40 """ returns SATIS codes for all atoms in a molecule 41 42 The SATIS definition used is from: 43 J. Chem. Inf. Comput. Sci. _39_ 751-757 (1999) 44 45 each SATIS code is a string consisting of _neighborsToInclude_ + 1 46 2 digit numbers 47 48 **Arguments** 49 50 - mol: a molecule 51 52 - neighborsToInclude (optional): the number of neighbors to include 53 in the SATIS codes 54 55 **Returns** 56 57 a list of strings nAtoms long 58 59 """ 60 global specialCases 61 62 nAtoms = mol.GetNumAtoms() 63 atomicNums = [0]*nAtoms 64 atoms = mol.GetAtoms() 65 for i in xrange(nAtoms): 66 atomicNums[i] = atoms[i].GetAtomicNum() 67 68 nSpecialCases = len(specialCases) 69 specialCaseMatches = [None]*nSpecialCases 70 for i,(patt,idx) in enumerate(specialCases): 71 if mol.HasSubstructMatch(patt): 72 specialCaseMatches[i] = mol.GetSubstructMatches(patt) 73 else: 74 specialCaseMatches[i] = () 75 76 codes = [None]*nAtoms 77 for i in range(nAtoms): 78 code = [99]*(neighborsToInclude+1) 79 atom = atoms[i] 80 atomIdx = atom.GetIdx() 81 code[0] = min(atom.GetAtomicNum(),99) 82 bonds = atom.GetBonds() 83 nBonds = len(bonds) 84 otherIndices = [-1]*nBonds 85 if _debug: print(code[0],end='') 86 for j in range(nBonds): 87 otherIndices[j] = bonds[j].GetOtherAtom(atom).GetIdx() 88 if _debug: print(otherIndices[j],end='') 89 if _debug: print() 90 otherNums = [atomicNums[x] for x in otherIndices] + \ 91 [1]*atom.GetTotalNumHs() 92 otherNums.sort() 93 94 nOthers = len(otherNums) 95 if nOthers > neighborsToInclude: 96 otherNums.reverse() 97 otherNums = otherNums[:neighborsToInclude] 98 otherNums.reverse() 99 for j in range(neighborsToInclude): 100 code[j+1] = min(otherNums[j],99) 101 else: 102 for j in range(nOthers): 103 code[j+1] = min(otherNums[j],99) 104 if nOthers < neighborsToInclude and code[0] in [6,8]: 105 found = 0 106 for j in range(nSpecialCases): 107 for matchTuple in specialCaseMatches[j]: 108 if atomIdx in matchTuple: 109 code[-1] = specialCases[j][1] 110 found = 1 111 break 112 if found: 113 break 114 115 codes[i] = ''.join(['%02d'%(x) for x in code]) 116 return codes
117 118 if __name__ == '__main__': 119 smis = ['CC(=O)NC','CP(F)(Cl)(Br)(O)', 120 'O=CC(=O)C','C(=O)OCC(=O)O','C(=O)[O-]'] 121 for smi in smis: 122 print(smi) 123 m = Chem.MolFromSmiles(smi) 124 codes = SATISTypes(m) 125 print(codes) 126