Mercurial > repos > bgruening > openbabel_remduplicates
comparison subsearch.py @ 13:d44de092fef3 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
| author | bgruening | 
|---|---|
| date | Mon, 19 Oct 2020 14:36:22 +0000 | 
| parents | 8c4a4e9e173c | 
| children | f3099132512d | 
   comparison
  equal
  deleted
  inserted
  replaced
| 12:8c4a4e9e173c | 13:d44de092fef3 | 
|---|---|
| 2 """ | 2 """ | 
| 3 Input: Molecules in SDF, SMILES ... | 3 Input: Molecules in SDF, SMILES ... | 
| 4 Output: Moleculs filtered with specified substructures. | 4 Output: Moleculs filtered with specified substructures. | 
| 5 Copyright 2013, Bjoern Gruening and Xavier Lucas | 5 Copyright 2013, Bjoern Gruening and Xavier Lucas | 
| 6 """ | 6 """ | 
| 7 import sys, os | |
| 8 import argparse | 7 import argparse | 
| 9 import multiprocessing | 8 import multiprocessing | 
| 9 import os | |
| 10 import shutil | |
| 11 import subprocess | |
| 12 import sys | |
| 10 import tempfile | 13 import tempfile | 
| 11 import subprocess | |
| 12 import shutil | |
| 13 | 14 | 
| 14 from openbabel import openbabel, pybel | 15 from openbabel import openbabel, pybel | 
| 15 openbabel.obErrorLog.StopLogging() | 16 openbabel.obErrorLog.StopLogging() | 
| 17 | |
| 16 | 18 | 
| 17 def parse_command_line(): | 19 def parse_command_line(): | 
| 18 parser = argparse.ArgumentParser() | 20 parser = argparse.ArgumentParser() | 
| 19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 21 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 
| 20 parser.add_argument('--iformat', help='Input format.') | 22 parser.add_argument('--iformat', help='Input format.') | 
| 21 parser.add_argument('--fastsearch-index', dest="fastsearch_index", | 23 parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, | 
| 22 required=True, help='Path to the openbabel fastsearch index.') | 24 help='Path to the openbabel fastsearch index.') | 
| 23 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 25 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 
| 24 parser.add_argument('--oformat', | 26 parser.add_argument('--oformat', default='smi', help='Output file format') | 
| 25 default='smi', help='Output file format') | 27 parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, | 
| 26 parser.add_argument("--max-candidates", dest="max_candidates", type=int, | 28 help="The maximum number of candidates.") | 
| 27 default=4000, help="The maximum number of candidates.") | 29 parser.add_argument('-p', '--processors', type=int, | 
| 28 parser.add_argument('-p', '--processors', type=int, | 30 default=multiprocessing.cpu_count()) | 
| 29 default=multiprocessing.cpu_count()) | |
| 30 return parser.parse_args() | 31 return parser.parse_args() | 
| 31 | 32 | 
| 33 | |
| 32 results = list() | 34 results = list() | 
| 35 | |
| 36 | |
| 33 def mp_callback(res): | 37 def mp_callback(res): | 
| 34 results.append(res) | 38 results.append(res) | 
| 35 | 39 | 
| 36 def mp_helper( query, args ): | 40 | 
| 41 def mp_helper(query, args): | |
| 37 """ | 42 """ | 
| 38 Helper function for multiprocessing. | 43 Helper function for multiprocessing. | 
| 39 That function is a wrapper around the following command: | 44 That function is a wrapper around the following command: | 
| 40 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | 45 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | 
| 41 """ | 46 """ | 
| 46 opts = '-o%s' % args.oformat | 51 opts = '-o%s' % args.oformat | 
| 47 | 52 | 
| 48 tmp = tempfile.NamedTemporaryFile(delete=False) | 53 tmp = tempfile.NamedTemporaryFile(delete=False) | 
| 49 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) | 54 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) | 
| 50 | 55 | 
| 51 child = subprocess.Popen(cmd.split(), | 56 child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 
| 52 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 53 | 57 | 
| 54 stdout, stderr = child.communicate() | 58 stdout, stderr = child.communicate() | 
| 55 return_code = child.returncode | 59 return_code = child.returncode | 
| 56 | 60 | 
| 57 if return_code: | 61 if return_code: | 
| 63 sys.stdout.write(stdout) | 67 sys.stdout.write(stdout) | 
| 64 sys.stdout.write(stderr) | 68 sys.stdout.write(stderr) | 
| 65 return (tmp.name, query) | 69 return (tmp.name, query) | 
| 66 | 70 | 
| 67 | 71 | 
| 68 def get_smiles_or_smarts( args ): | 72 def get_smiles_or_smarts(args): | 
| 69 """ | 73 """ | 
| 70 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | 74 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | 
| 71 """ | 75 """ | 
| 72 if args.iformat in ['smi', 'text', 'tabular']: | 76 if args.iformat in ['smi', 'text', 'tabular']: | 
| 73 with open( args.infile ) as text_file: | 77 with open(args.infile) as text_file: | 
| 74 for line in text_file: | 78 for line in text_file: | 
| 75 yield line.split('\t')[0].strip() | 79 yield line.split('\t')[0].strip() | 
| 76 else: | 80 else: | 
| 77 # inchi or sdf files | 81 # inchi or sdf files | 
| 78 for mol in pybel.readfile( args.iformat, args.infile ): | 82 for mol in pybel.readfile(args.iformat, args.infile): | 
| 79 yield mol.write('smiles').split('\t')[0] | 83 yield mol.write('smiles').split('\t')[0] | 
| 80 | 84 | 
| 81 def substructure_search( args ): | |
| 82 | 85 | 
| 83 pool = multiprocessing.Pool( args.processors ) | 86 def substructure_search(args): | 
| 84 for query in get_smiles_or_smarts( args ): | 87 pool = multiprocessing.Pool(args.processors) | 
| 88 for query in get_smiles_or_smarts(args): | |
| 85 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | 89 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | 
| 86 #mp_callback( mp_helper(query, args) ) | 90 # mp_callback(mp_helper(query, args)) | 
| 87 pool.close() | 91 pool.close() | 
| 88 pool.join() | 92 pool.join() | 
| 89 | 93 | 
| 90 if args.oformat == 'names': | 94 if args.oformat == 'names': | 
| 91 out_handle = open( args.outfile, 'w' ) | 95 out_handle = open(args.outfile, 'w') | 
| 92 for result_file, query in results: | 96 for result_file, query in results: | 
| 93 with open(result_file) as res_handle: | 97 with open(result_file) as res_handle: | 
| 94 for line in res_handle: | 98 for line in res_handle: | 
| 95 out_handle.write('%s\t%s\n' % ( line.strip(), query )) | 99 out_handle.write('%s\t%s\n' % (line.strip(), query)) | 
| 96 os.remove( result_file ) | 100 os.remove(result_file) | 
| 97 out_handle.close() | 101 out_handle.close() | 
| 98 else: | 102 else: | 
| 99 out_handle = open( args.outfile, 'wb' ) | 103 out_handle = open(args.outfile, 'wb') | 
| 100 for result_file, query in results: | 104 for result_file, query in results: | 
| 101 res_handle = open(result_file,'rb') | 105 res_handle = open(result_file, 'rb') | 
| 102 shutil.copyfileobj( res_handle, out_handle ) | 106 shutil.copyfileobj(res_handle, out_handle) | 
| 103 res_handle.close() | 107 res_handle.close() | 
| 104 os.remove( result_file ) | 108 os.remove(result_file) | 
| 105 out_handle.close() | 109 out_handle.close() | 
| 106 | 110 | 
| 107 | 111 | 
| 108 def __main__(): | 112 def __main__(): | 
| 109 """ | 113 """ | 
| 110 Multiprocessing Open Babel Substructure Search. | 114 Multiprocessing Open Babel Substructure Search. | 
| 111 """ | 115 """ | 
| 112 args = parse_command_line() | 116 args = parse_command_line() | 
| 113 substructure_search( args ) | 117 substructure_search(args) | 
| 114 | 118 | 
| 115 if __name__ == "__main__" : | 119 | 
| 120 if __name__ == "__main__": | |
| 116 __main__() | 121 __main__() | 
