comparison rdkit_descriptors.py @ 1:45b822b9d522

Uploaded
author bgruening
date Sat, 27 Apr 2013 13:15:27 -0400
parents 764340994e71
children
comparison
equal deleted inserted replaced
0:764340994e71 1:45b822b9d522
5 import sys, os, re 5 import sys, os, re
6 import argparse 6 import argparse
7 import inspect 7 import inspect
8 8
9 def get_supplier( infile, format = 'smiles' ): 9 def get_supplier( infile, format = 'smiles' ):
10 """
11 Returns a generator over a SMILES or InChI file. Every element is of RDKit
12 molecule and has its original string as _Name property.
13 """
10 with open(infile) as handle: 14 with open(infile) as handle:
11 for line in handle: 15 for line in handle:
12 line = line.strip() 16 line = line.strip()
13 if format == 'smiles': 17 if format == 'smiles':
14 mol = Chem.MolFromSmiles( line, sanitize=True ) 18 mol = Chem.MolFromSmiles( line, sanitize=True )
20 mol.SetProp( '_Name', line.split('\t')[0] ) 24 mol.SetProp( '_Name', line.split('\t')[0] )
21 yield mol 25 yield mol
22 26
23 27
24 def get_rdkit_descriptor_functions(): 28 def get_rdkit_descriptor_functions():
29 """
30 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function)
31 """
25 ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] 32 ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ]
26 ret.sort() 33 ret.sort()
27 return ret 34 return ret
28 35
29 36
30 def descriptors( mol, functions ): 37 def descriptors( mol, functions ):
31 """ 38 """
32 Calculates the descriptors of a given molecule. 39 Calculates the descriptors of a given molecule.
33 """ 40 """
34
35 for name, function in functions: 41 for name, function in functions:
36 yield (name, function( mol )) 42 yield (name, function( mol ))
37 43
38 44
39 if __name__ == "__main__": 45 if __name__ == "__main__":
64 70
65 for mol in supplier: 71 for mol in supplier:
66 if not mol: 72 if not mol:
67 continue 73 continue
68 descs = descriptors( mol, functions ) 74 descs = descriptors( mol, functions )
69 name = mol.GetProp("_Name") 75 molecule_id = mol.GetProp("_Name")
70 args.outfile.write( "%s\n" % '\t'.join( [name]+ [str(res) for name, res in descs] ) ) 76 args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(res) for name, res in descs] ) )
71 77