1
2
3
4
5
6 from rdkit import RDConfig
7 import sys,os
8 from rdkit import Chem
9 from rdkit.VLib.Filter import FilterNode
10
12 """ canonical-smiles based duplicate filter
13
14 Assumptions:
15
16 - inputs are molecules
17
18
19 Sample Usage:
20 >>> from rdkit.VLib.NodeLib.SDSupply import SDSupplyNode
21 >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\
22 'test_data','NCI_aids.10.sdf')
23 >>> suppl = SDSupplyNode(fileN)
24 >>> filt = DupeFilter()
25 >>> filt.AddParent(suppl)
26 >>> ms = [x for x in filt]
27 >>> len(ms)
28 10
29 >>> ms[0].GetProp("_Name")
30 '48'
31 >>> ms[1].GetProp("_Name")
32 '78'
33 >>> filt.reset()
34 >>> filt.next().GetProp("_Name")
35 '48'
36
37
38 """
42
46
48 smi = Chem.MolToSmiles(cmpd)
49 if smi not in self._smisSeen:
50 self._smisSeen.append(smi)
51 return 1
52 else:
53 return 0
54
55
56
57
58
60 import doctest,sys
61 return doctest.testmod(sys.modules["__main__"])
62
63 if __name__ == '__main__':
64 import sys
65 failed,tried = _test()
66 sys.exit(failed)
67