Mercurial > repos > bgruening > ctb_rdkit_descriptors
comparison rdkit_descriptors.py @ 8:2d051db1f561 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
| author | bgruening |
|---|---|
| date | Wed, 17 Feb 2021 12:58:03 +0000 |
| parents | 81233a9053f5 |
| children | 87e62bbb4901 |
comparison
equal
deleted
inserted
replaced
| 7:58520ccba184 | 8:2d051db1f561 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 | 2 |
| 3 from rdkit.Chem import Descriptors | |
| 4 from rdkit import Chem | |
| 5 import sys, os, re | |
| 6 import argparse | 3 import argparse |
| 7 import inspect | 4 import inspect |
| 5 import sys | |
| 8 | 6 |
| 9 def get_supplier( infile, format = 'smiles' ): | 7 from rdkit import Chem |
| 8 from rdkit.Chem import Descriptors | |
| 9 | |
| 10 | |
| 11 def get_supplier(infile, format='smiles'): | |
| 10 """ | 12 """ |
| 11 Returns a generator over a SMILES or InChI file. Every element is of RDKit | 13 Returns a generator over a SMILES or InChI file. Every element is of RDKit |
| 12 molecule and has its original string as _Name property. | 14 molecule and has its original string as _Name property. |
| 13 """ | 15 """ |
| 14 with open(infile) as handle: | 16 with open(infile) as handle: |
| 15 for line in handle: | 17 for line in handle: |
| 16 line = line.strip() | 18 line = line.strip() |
| 17 if format == 'smiles': | 19 if format == 'smiles': |
| 18 mol = Chem.MolFromSmiles( line, sanitize=True ) | 20 mol = Chem.MolFromSmiles(line, sanitize=True) |
| 19 elif format == 'inchi': | 21 elif format == 'inchi': |
| 20 mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) | 22 mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False) |
| 21 if mol is None: | 23 if mol is None: |
| 22 yield False | 24 yield False |
| 23 else: | 25 else: |
| 24 mol.SetProp( '_Name', line.split('\t')[0] ) | 26 mol.SetProp('_Name', line.split('\t')[0]) |
| 25 yield mol | 27 yield mol |
| 28 | |
| 26 | 29 |
| 27 def get_rdkit_descriptor_functions(): | 30 def get_rdkit_descriptor_functions(): |
| 28 """ | 31 """ |
| 29 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) | 32 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) |
| 30 """ | 33 """ |
| 31 ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] | 34 ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')] |
| 35 # some which are not in the official Descriptors module we need to add manually | |
| 36 ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)]) | |
| 32 ret.sort() | 37 ret.sort() |
| 33 return ret | 38 return ret |
| 34 | 39 |
| 35 | 40 |
| 36 def descriptors( mol, functions ): | 41 def descriptors(mol, functions): |
| 37 """ | 42 """ |
| 38 Calculates the descriptors of a given molecule. | 43 Calculates the descriptors of a given molecule. |
| 39 """ | 44 """ |
| 40 for name, function in functions: | 45 for name, function in functions: |
| 41 yield (name, function( mol )) | 46 yield (name, function(mol)) |
| 42 | 47 |
| 43 | 48 |
| 44 if __name__ == "__main__": | 49 if __name__ == "__main__": |
| 45 parser = argparse.ArgumentParser() | 50 parser = argparse.ArgumentParser() |
| 46 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') | 51 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') |
| 47 parser.add_argument("--iformat", help="Specify the input file format.") | 52 parser.add_argument("--iformat", help="Specify the input file format.") |
| 48 | 53 |
| 49 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), | 54 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), |
| 50 default=sys.stdout, help="path to the result file, default it sdtout") | 55 default=sys.stdout, |
| 56 help="path to the result file, default is stdout") | |
| 57 | |
| 58 parser.add_argument('-s', '--select', default=None, | |
| 59 help="select a subset of comma-separated descriptors to use") | |
| 51 | 60 |
| 52 parser.add_argument("--header", dest="header", action="store_true", | 61 parser.add_argument("--header", dest="header", action="store_true", |
| 53 default=False, | 62 default=False, |
| 54 help="Write header line.") | 63 help="Write header line.") |
| 55 | 64 |
| 56 args = parser.parse_args() | 65 args = parser.parse_args() |
| 57 | 66 |
| 58 if args.iformat == 'sdf': | 67 if args.iformat == 'sdf': |
| 59 supplier = Chem.SDMolSupplier( args.infile ) | 68 supplier = Chem.SDMolSupplier(args.infile) |
| 60 elif args.iformat =='smi': | 69 elif args.iformat == 'smi': |
| 61 supplier = get_supplier( args.infile, format = 'smiles' ) | 70 supplier = get_supplier(args.infile, format='smiles') |
| 62 elif args.iformat == 'inchi': | 71 elif args.iformat == 'inchi': |
| 63 supplier = get_supplier( args.infile, format = 'inchi' ) | 72 supplier = get_supplier(args.infile, format='inchi') |
| 73 elif args.iformat == 'pdb': | |
| 74 supplier = [Chem.MolFromPDBFile(args.infile)] | |
| 75 elif args.iformat == 'mol2': | |
| 76 supplier = [Chem.MolFromMol2File(args.infile)] | |
| 64 | 77 |
| 65 functions = get_rdkit_descriptor_functions() | 78 functions = get_rdkit_descriptor_functions() |
| 79 if args.select and args.select != 'None': | |
| 80 selected = args.select.split(',') | |
| 81 functions = [(name, f) for name, f in functions if name in selected] | |
| 66 | 82 |
| 67 if args.header: | 83 if args.header: |
| 68 args.outfile.write( '%s\n' % '\t'.join( ['MoleculeID'] + [name for name, f in functions] ) ) | 84 args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions])) |
| 69 | 85 |
| 70 for mol in supplier: | 86 for mol in supplier: |
| 71 if not mol: | 87 if not mol: |
| 72 continue | 88 continue |
| 73 descs = descriptors( mol, functions ) | 89 descs = descriptors(mol, functions) |
| 74 molecule_id = mol.GetProp("_Name") | 90 try: |
| 75 args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(round(res, 6)) for name, res in descs] ) ) | 91 molecule_id = mol.GetProp("_Name") |
| 76 | 92 except KeyError: |
| 93 molecule_id = Chem.MolToSmiles(mol) | |
| 94 args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs])) |
