|
0
|
1 #!/usr/bin/env python
|
|
|
2
|
|
|
3 """
|
|
|
4 Converts a SD-file to a GSPAN file.
|
|
|
5 """
|
|
|
6
|
|
1
|
7 import os
|
|
|
8 import sys
|
|
0
|
9 import argparse
|
|
1
|
10 import openbabel
|
|
|
11 import pybel
|
|
0
|
12
|
|
|
13 def main( args ):
|
|
|
14
|
|
1
|
15 for infile in args.infile:
|
|
|
16 file_extension = args.format or os.path.splitext( infile )[-1].lstrip('.')
|
|
|
17
|
|
|
18 if not args.format and file_extension not in ['smi', 'sdf', 'inchi', 'mol']:
|
|
|
19 sys.exit('Could not guess the format from the file extension please specify with the --format option.')
|
|
|
20
|
|
|
21 molecules = pybel.readfile(file_extension, infile)
|
|
|
22 for mol in molecules:
|
|
|
23 args.outfile.write( 't # id %s\n' % mol.title.strip() )
|
|
|
24 for atom in openbabel.OBMolAtomIter( mol.OBMol):
|
|
|
25 label = atom.GetAtomicNum()
|
|
|
26 vertex_index = atom.GetIdx()
|
|
|
27 args.outfile.write('v %s %s\n' % (vertex_index, label))
|
|
0
|
28
|
|
1
|
29 for bond in openbabel.OBMolBondIter( mol.OBMol):
|
|
|
30 src_index = bond.GetBeginAtomIdx()
|
|
|
31 dest_index = bond.GetEndAtomIdx()
|
|
|
32 assert(src_index > 0)
|
|
|
33 assert(dest_index > 0)
|
|
|
34 if bond.IsAromatic():
|
|
|
35 label = 'a'
|
|
|
36 elif bond.IsSingle():
|
|
|
37 label = 's'
|
|
|
38 elif bond.IsDouble():
|
|
|
39 label = 'd'
|
|
|
40 elif bond.IsTriple():
|
|
|
41 label = 't'
|
|
|
42 atom1 = bond.GetBeginAtom()
|
|
|
43 atom2 = bond.GetEndAtom()
|
|
|
44 args.outfile.write('e %s %s %s\n' % (src_index, dest_index, label))
|
|
|
45
|
|
|
46
|
|
0
|
47
|
|
|
48 if __name__ == "__main__":
|
|
|
49 parser = argparse.ArgumentParser()
|
|
1
|
50 parser.add_argument('-i', '--infile', nargs='*',
|
|
|
51 help="Specify one or more input files")
|
|
|
52 parser.add_argument('-f', '--format',
|
|
|
53 help="Format of the input file.")
|
|
0
|
54 parser.add_argument('--outfile', type=argparse.FileType('w'),
|
|
|
55 default=sys.stdout, help="Specify one output file")
|
|
|
56 args = parser.parse_args()
|
|
|
57 main( args )
|