Package rdkit :: Package VLib :: Package NodeLib :: Module SmilesDupeFilter
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.SmilesDupeFilter

 1  #  $Id$ 
 2  # 
 3  #  Copyright (C) 2003 Rational Discovery LLC 
 4  #     All Rights Reserved 
 5  # 
 6  from rdkit import RDConfig 
 7  import sys,os 
 8  from rdkit import Chem 
 9  from rdkit.VLib.Filter import FilterNode 
10   
11 -class DupeFilter(FilterNode):
12 """ canonical-smiles based duplicate filter 13 14 Assumptions: 15 16 - inputs are molecules 17 18 19 Sample Usage: 20 >>> from rdkit.VLib.NodeLib.SDSupply import SDSupplyNode 21 >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ 22 'test_data','NCI_aids.10.sdf') 23 >>> suppl = SDSupplyNode(fileN) 24 >>> filt = DupeFilter() 25 >>> filt.AddParent(suppl) 26 >>> ms = [x for x in filt] 27 >>> len(ms) 28 10 29 >>> ms[0].GetProp("_Name") 30 '48' 31 >>> ms[1].GetProp("_Name") 32 '78' 33 >>> filt.reset() 34 >>> filt.next().GetProp("_Name") 35 '48' 36 37 38 """
39 - def __init__(self,**kwargs):
40 FilterNode.__init__(self,func=self.filter,**kwargs) 41 self._smisSeen = []
42
43 - def reset(self):
44 FilterNode.reset(self) 45 self._smisSeen = []
46
47 - def filter(self,cmpd):
48 smi = Chem.MolToSmiles(cmpd) 49 if smi not in self._smisSeen: 50 self._smisSeen.append(smi) 51 return 1 52 else: 53 return 0
54 55 #------------------------------------ 56 # 57 # doctest boilerplate 58 #
59 -def _test():
60 import doctest,sys 61 return doctest.testmod(sys.modules["__main__"])
62 63 if __name__ == '__main__': 64 import sys 65 failed,tried = _test() 66 sys.exit(failed) 67