1
2
3
4
5
6
7
8
9
10
11 from rdkit import RDConfig
12 from rdkit import Chem
13 import sys,csv
14
15 -def Convert(suppl,outFile,keyCol='',stopAfter=-1,includeChirality=0,smilesFrom=''):
16 w = csv.writer(outFile)
17 mol = suppl[0]
18 propNames = list(mol.GetPropNames())
19 if keyCol and keyCol in propNames:
20 propNames.remove(keyCol)
21
22 outL = []
23 if keyCol:
24 outL.append(keyCol)
25 outL.append('SMILES')
26 outL.extend(propNames)
27 w.writerow(outL)
28 nDone = 0
29 for mol in suppl:
30 if not mol:
31 continue
32 if not smilesFrom or not mol.HasProp(smilesFrom):
33 smi = Chem.MolToSmiles(mol,includeChirality)
34 else:
35 smi = mol.GetProp(smilesFrom)
36 tMol = Chem.MolFromSmiles(smi)
37 smi = Chem.MolToSmiles(tMol,includeChirality)
38 outL = []
39 if keyCol:
40 outL.append(str(mol.GetProp(keyCol)))
41 outL.append(smi)
42 for prop in propNames:
43 if mol.HasProp(prop):
44 outL.append(str(mol.GetProp(prop)))
45 else:
46 outL.append('')
47 w.writerow(outL)
48 nDone += 1
49 if nDone == stopAfter:
50 break
51 return
52
53
54
55
56 import unittest
63 import os
64 from rdkit.six.moves import cStringIO as StringIO
65 fName = os.path.join(RDConfig.RDDataDir,'NCI','first_200.props.sdf')
66 suppl = Chem.SDMolSupplier(fName)
67 io = StringIO()
68 try:
69 Convert(suppl,io)
70 except:
71 import traceback
72 traceback.print_exc()
73 self.fail('conversion failed')
74 txt = io.getvalue()
75 lines = txt.split('\n')
76 if not lines[-1]:
77 del lines[-1]
78 self.assertTrue(len(lines)==201,'bad num lines: %d'%len(lines))
79 line0 = lines[0].split(',')
80 self.assertEqual(len(line0),20)
81 self.assertTrue(line0[0]=='SMILES')
83 import os
84 from rdkit.six.moves import cStringIO as StringIO
85 fName = os.path.join(RDConfig.RDDataDir,'NCI','first_200.props.sdf')
86 suppl = Chem.SDMolSupplier(fName)
87 io = StringIO()
88 try:
89 Convert(suppl,io,keyCol='AMW',stopAfter=5)
90 except:
91 import traceback
92 traceback.print_exc()
93 self.fail('conversion failed')
94 txt = io.getvalue()
95 lines = txt.split('\n')
96 if not lines[-1]:
97 del lines[-1]
98 self.assertTrue(len(lines)==6,'bad num lines: %d'%len(lines))
99 line0 = lines[0].split(',')
100 self.assertEqual(len(line0),20)
101 self.assertTrue(line0[0]=='AMW')
102 self.assertTrue(line0[1]=='SMILES')
103
104
105
106
107
108
109
110
111
113 message = """
114 Usage: SDFToCSV [-k keyCol] inFile.sdf [outFile.csv]
115
116 """
117 sys.stderr.write(message)
118 sys.exit(-1)
119
120
121
122 if __name__=='__main__':
123 import getopt
124
125 try:
126 args,extras = getopt.getopt(sys.argv[1:],'hk:',
127 ['test',
128 'chiral',
129 'smilesCol=',
130 ])
131 except:
132 import traceback
133 traceback.print_exc()
134 Usage()
135
136 keyCol = ''
137 testIt = 0
138 useChirality=0
139 smilesCol=''
140 for arg,val in args:
141 if arg=='-k':
142 keyCol = val
143 elif arg=='--chiral':
144 useChirality=1
145 elif arg=='--smilesCol':
146 smilesCol=val
147 elif arg=='--test':
148 testIt=1
149 elif arg=='-h':
150 Usage()
151
152 if not testIt and len(extras)<1:
153 Usage()
154
155
156 if not testIt:
157 inFilename = extras[0]
158 if len(extras)>1:
159 outFilename = extras[1]
160 outF = open(outFilename,'w+')
161 else:
162 outF = sys.stdout
163
164 suppl = Chem.SDMolSupplier(inFilename)
165 Convert(suppl,outF,keyCol=keyCol,includeChirality=useChirality,smilesFrom=smilesCol)
166 else:
167 sys.argv = [sys.argv[0]]
168 unittest.main()
169