Package rdkit :: Package Chem :: Module FunctionalGroups
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.FunctionalGroups

  1  # $Id$ 
  2  # 
  3  #  Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, October 2006 
 33  # 
 34  import os,weakref,re 
 35  from rdkit.six.moves import cStringIO as StringIO 
 36  from rdkit import RDConfig 
 37   
38 -class FGHierarchyNode(object):
39 children=None 40 name="" 41 label="" 42 pattern=None 43 smarts="" 44 rxnSmarts="" 45 parent=None 46 removalReaction=None
47 - def __init__(self,name,patt,smarts="",label="",rxnSmarts="",parent=None):
48 self.name=name 49 self.pattern=patt 50 if parent: 51 self.parent=weakref.ref(parent) 52 self.label=label 53 self.smarts=smarts 54 self.children = [] 55 self.rxnSmarts=rxnSmarts
56
57 - def __len__(self):
58 res = 1 59 for child in self.children: 60 res += len(child) 61 return res
62
63 -class FuncGroupFileParseError(ValueError):
64 pass
65 66 groupDefns = {} 67 hierarchy=None 68 lastData=None 69 lastFilename=None
70 -def BuildFuncGroupHierarchy(fileNm=None,data=None,force=False):
71 global groupDefns,hierarchy,lastData,lastFilename 72 if not force and hierarchy and (not data or data==lastData) and \ 73 (not fileNm or fileNm==lastFilename): 74 return hierarchy[:] 75 lastData=data 76 splitter = re.compile('\t+') 77 from rdkit import Chem 78 79 if not fileNm and not data: 80 fileNm = os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt') 81 82 if fileNm: 83 inF = open(fileNm,'r') 84 lastFilename = fileNm 85 elif data: 86 inF = StringIO(data) 87 else: 88 raise ValueError("need data or filename") 89 90 groupDefns={} 91 res = [] 92 lineNo=0 93 for line in inF.readlines(): 94 lineNo+=1 95 line=line.strip() 96 line = line.split('//')[0] 97 if not line: 98 continue 99 splitL = splitter.split(line) 100 if len(splitL)<3: 101 raise FuncGroupFileParseError("Input line %d (%s) is not long enough."%(lineNo,repr(line))) 102 label = splitL[0].strip() 103 if label in groupDefns: 104 raise FuncGroupFileParseError("Duplicate label on line %d."%lineNo) 105 labelHierarchy = label.split('.') 106 if len(labelHierarchy)>1: 107 for i in range(len(labelHierarchy)-1): 108 tmp = '.'.join(labelHierarchy[:i+1]) 109 if not tmp in groupDefns: 110 raise FuncGroupFileParseError("Hierarchy member %s (line %d) not found."%(tmp,lineNo)) 111 parent = groupDefns['.'.join(labelHierarchy[:-1])] 112 else: 113 parent = None 114 smarts = splitL[1] 115 try: 116 patt = Chem.MolFromSmarts(smarts) 117 except: 118 import traceback 119 traceback.print_exc() 120 patt = None 121 if not patt: 122 raise FuncGroupFileParseError('Smarts "%s" (line %d) could not be parsed.'%(smarts,lineNo)) 123 124 name = splitL[2].strip() 125 126 rxnSmarts='' 127 if len(splitL)>3: 128 rxnSmarts=splitL[3] 129 130 node = FGHierarchyNode(name,patt,smarts=smarts,label=label,parent=parent,rxnSmarts=rxnSmarts) 131 if parent: 132 parent.children.append(node) 133 else: 134 res.append(node) 135 groupDefns[label] = node 136 hierarchy=res[:] 137 return res
138
139 -def _SetNodeBits(mol,node,res,idx):
140 ms = mol.GetSubstructMatches(node.pattern) 141 count = 0 142 seen = {} 143 for m in ms: 144 if m[0] not in seen: 145 count+=1 146 seen[m[0]] = 1 147 if count: 148 res[idx] = count 149 idx += 1 150 for child in node.children: 151 idx=_SetNodeBits(mol,child,res,idx) 152 else: 153 idx += len(node) 154 return idx
155
156 -def CreateMolFingerprint(mol,hierarchy):
157 totL = 0 158 for entry in hierarchy: 159 totL += len(entry) 160 res = [0]*totL 161 idx = 0 162 for entry in hierarchy: 163 idx = _SetNodeBits(mol,entry,res,idx) 164 return res
165