1
2
3
4
5
6 from rdkit import RDConfig
7 import sys,os,types
8 from rdkit import Chem
9 from rdkit.VLib.Transform import TransformNode
10
12 """ transforms molecules by removing atoms matching smarts patterns
13
14 Assumptions:
15
16 - inputs are molecules
17
18
19 Sample Usage:
20 >>> smis = ['C1CCC1.C=O','C1CCC1C=O','CCC=O.C=O','NCC=O.C=O.CN']
21 >>> mols = [Chem.MolFromSmiles(x) for x in smis]
22 >>> from rdkit.VLib.Supply import SupplyNode
23 >>> suppl = SupplyNode(contents=mols)
24 >>> ms = [x for x in suppl]
25 >>> len(ms)
26 4
27
28 We can pass in SMARTS strings:
29 >>> smas = ['C=O','CN']
30 >>> tform = SmartsRemover(patterns=smas)
31 >>> tform.AddParent(suppl)
32 >>> ms = [x for x in tform]
33 >>> len(ms)
34 4
35 >>> Chem.MolToSmiles(ms[0])
36 'C1CCC1'
37 >>> Chem.MolToSmiles(ms[1])
38 'O=CC1CCC1'
39 >>> Chem.MolToSmiles(ms[2])
40 'CCC=O'
41 >>> Chem.MolToSmiles(ms[3])
42 'NCC=O'
43
44 We can also remove pieces of the molecule that are not complete
45 fragments:
46 >>> tform.Destroy()
47 >>> smas = ['C=O','CN']
48 >>> smas = [Chem.MolFromSmarts(x) for x in smas]
49 >>> tform = SmartsRemover(patterns=smas,wholeFragments=0)
50 >>> tform.AddParent(suppl)
51 >>> ms = [x for x in tform]
52 >>> len(ms)
53 4
54 >>> Chem.MolToSmiles(ms[0])
55 'C1CCC1'
56 >>> Chem.MolToSmiles(ms[1])
57 'C1CCC1'
58 >>> Chem.MolToSmiles(ms[3])
59 ''
60
61 Or patterns themselves:
62 >>> tform.Destroy()
63 >>> smas = ['C=O','CN']
64 >>> smas = [Chem.MolFromSmarts(x) for x in smas]
65 >>> tform = SmartsRemover(patterns=smas)
66 >>> tform.AddParent(suppl)
67 >>> ms = [x for x in tform]
68 >>> len(ms)
69 4
70 >>> Chem.MolToSmiles(ms[0])
71 'C1CCC1'
72 >>> Chem.MolToSmiles(ms[3])
73 'NCC=O'
74
75
76 """
77 - def __init__(self,patterns=[],wholeFragments=1,**kwargs):
81
83 nPatts = len(patterns)
84 targets = [None]*nPatts
85 for i in range(nPatts):
86 p = patterns[i]
87 if type(p) in types.StringTypes:
88 m = Chem.MolFromSmarts(p)
89 if not m:
90 raise ValueError('bad smarts: %s'%(p))
91 p = m
92 targets[i] = p
93 self._patterns = tuple(targets)
94
102
103 biggerTest="""
104 >>> smis = ['CCOC','CCO.Cl','CC(=O)[O-].[Na+]','OCC','C[N+](C)(C)C.[Cl-]']
105 >>> mols = [Chem.MolFromSmiles(x) for x in smis]
106 >>> from rdkit.VLib.Supply import SupplyNode
107 >>> suppl = SupplyNode(contents=mols)
108 >>> ms = [x for x in suppl]
109 >>> len(ms)
110 5
111
112 #>>> salts = ['[Cl;H1&X1,-]','[Na+]','[O;H2,H1&-,X0&-2]']
113
114 >>> salts = ['[Cl;H1&X1,-]','[Na+]','[O;H2,H1&-,X0&-2]']
115 >>> m = mols[2]
116 >>> m.GetNumAtoms()
117 5
118 >>> patts = [Chem.MolFromSmarts(x) for x in salts]
119 >>> m2 = Chem.DeleteSubstructs(m,patts[0],1)
120 >>> m2.GetNumAtoms()
121 5
122 >>> m2 = Chem.DeleteSubstructs(m2,patts[1],1)
123 >>> m2.GetNumAtoms()
124 4
125 >>> m2 = Chem.DeleteSubstructs(m2,patts[2],1)
126 >>> m2.GetNumAtoms()
127 4
128
129 >>> tform = SmartsRemover(patterns=salts)
130 >>> tform.AddParent(suppl)
131 >>> ms = [x for x in tform]
132 >>> len(ms)
133 5
134
135 """
136
137
138
139
140
141 __test__={'bigger':biggerTest}
143 import doctest,sys
144 return doctest.testmod(sys.modules["__main__"])
145
146
147 if __name__ == '__main__':
148 import sys
149 failed,tried = _test()
150 sys.exit(failed)
151