Package rdkit :: Package VLib :: Package NodeLib :: Module SmartsMolFilter
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.SmartsMolFilter

  1  #  $Id$ 
  2  # 
  3  #  Copyright (C) 2003 Rational Discovery LLC 
  4  #     All Rights Reserved 
  5  # 
  6  from rdkit import RDConfig 
  7  import sys,os,types 
  8  from rdkit import Chem 
  9  from rdkit.VLib.Filter import FilterNode 
 10   
11 -class SmartsFilter(FilterNode):
12 """ filter out molecules matching one or more SMARTS patterns 13 14 There is a count associated with each pattern. Molecules are 15 allowed to match the pattern up to this number of times. 16 17 Assumptions: 18 19 - inputs are molecules 20 21 22 Sample Usage: 23 >>> smis = ['C1CCC1','C1CCC1C=O','CCCC','CCC=O','CC(=O)C','CCN','NCCN','NCC=O'] 24 >>> mols = [Chem.MolFromSmiles(x) for x in smis] 25 >>> from rdkit.VLib.Supply import SupplyNode 26 >>> suppl = SupplyNode(contents=mols) 27 >>> ms = [x for x in suppl] 28 >>> len(ms) 29 8 30 31 We can pass in SMARTS strings: 32 >>> smas = ['C=O','CN'] 33 >>> counts = [1,2] 34 >>> filt = SmartsFilter(patterns=smas,counts=counts) 35 >>> filt.AddParent(suppl) 36 >>> ms = [x for x in filt] 37 >>> len(ms) 38 5 39 40 Alternatively, we can pass in molecule objects: 41 >>> mols =[Chem.MolFromSmarts(x) for x in smas] 42 >>> counts = [1,2] 43 >>> filt.Destroy() 44 >>> filt = SmartsFilter(patterns=mols,counts=counts) 45 >>> filt.AddParent(suppl) 46 >>> ms = [x for x in filt] 47 >>> len(ms) 48 5 49 50 Negation does what you'd expect: 51 >>> filt.SetNegate(1) 52 >>> ms = [x for x in filt] 53 >>> len(ms) 54 3 55 56 57 """
58 - def __init__(self,patterns=[],counts=[],**kwargs):
59 FilterNode.__init__(self,func=self.filter,**kwargs) 60 self._initPatterns(patterns,counts)
61
62 - def _initPatterns(self,patterns,counts):
63 nPatts = len(patterns) 64 if len(counts) and len(counts)!=nPatts: 65 raise ValueError('if counts is specified, it must match patterns in length') 66 if not len(counts): 67 counts = [1]*nPatts 68 targets = [None]*nPatts 69 for i in range(nPatts): 70 p = patterns[i] 71 c = counts[i] 72 if type(p) in types.StringTypes: 73 m = Chem.MolFromSmarts(p) 74 if not m: 75 raise ValueError('bad smarts: %s'%(p)) 76 p = m 77 targets[i] = p,c 78 self._patterns = tuple(targets)
79
80 - def filter(self,cmpd):
81 neg = self.Negate() 82 res = 0 83 #sys.stderr.write('\tFILTER: %s\n'%(Chem.MolToSmiles(cmpd))) 84 for patt,count in self._patterns: 85 ms = cmpd.GetSubstructMatches(patt) 86 nMatches = len(ms) 87 if nMatches >= count: 88 # this query is an or, so we short circuit true: 89 res = 1 90 break 91 return res
92 93 #------------------------------------ 94 # 95 # doctest boilerplate 96 #
97 -def _test():
98 import doctest,sys 99 return doctest.testmod(sys.modules["__main__"])
100 101 102 if __name__ == '__main__': 103 import sys 104 failed,tried = _test() 105 sys.exit(failed) 106