Mercurial > repos > bgruening > openbabel_remduplicates
comparison subsearch.py @ 15:f3099132512d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
| author | bgruening | 
|---|---|
| date | Thu, 15 Aug 2024 10:54:17 +0000 | 
| parents | d44de092fef3 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 14:89e8077589f2 | 15:f3099132512d | 
|---|---|
| 11 import subprocess | 11 import subprocess | 
| 12 import sys | 12 import sys | 
| 13 import tempfile | 13 import tempfile | 
| 14 | 14 | 
| 15 from openbabel import openbabel, pybel | 15 from openbabel import openbabel, pybel | 
| 16 | |
| 16 openbabel.obErrorLog.StopLogging() | 17 openbabel.obErrorLog.StopLogging() | 
| 17 | 18 | 
| 18 | 19 | 
| 19 def parse_command_line(): | 20 def parse_command_line(): | 
| 20 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() | 
| 21 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 22 parser.add_argument("-i", "--infile", required=True, help="Molecule file.") | 
| 22 parser.add_argument('--iformat', help='Input format.') | 23 parser.add_argument("--iformat", help="Input format.") | 
| 23 parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, | 24 parser.add_argument( | 
| 24 help='Path to the openbabel fastsearch index.') | 25 "--fastsearch-index", | 
| 25 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 26 dest="fastsearch_index", | 
| 26 parser.add_argument('--oformat', default='smi', help='Output file format') | 27 required=True, | 
| 27 parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, | 28 help="Path to the openbabel fastsearch index.", | 
| 28 help="The maximum number of candidates.") | 29 ) | 
| 29 parser.add_argument('-p', '--processors', type=int, | 30 parser.add_argument( | 
| 30 default=multiprocessing.cpu_count()) | 31 "-o", "--outfile", required=True, help="Path to the output file." | 
| 32 ) | |
| 33 parser.add_argument("--oformat", default="smi", help="Output file format") | |
| 34 parser.add_argument( | |
| 35 "--max-candidates", | |
| 36 dest="max_candidates", | |
| 37 type=int, | |
| 38 default=4000, | |
| 39 help="The maximum number of candidates.", | |
| 40 ) | |
| 41 parser.add_argument( | |
| 42 "-p", "--processors", type=int, default=multiprocessing.cpu_count() | |
| 43 ) | |
| 31 return parser.parse_args() | 44 return parser.parse_args() | 
| 32 | 45 | 
| 33 | 46 | 
| 34 results = list() | 47 results = list() | 
| 35 | 48 | 
| 38 results.append(res) | 51 results.append(res) | 
| 39 | 52 | 
| 40 | 53 | 
| 41 def mp_helper(query, args): | 54 def mp_helper(query, args): | 
| 42 """ | 55 """ | 
| 43 Helper function for multiprocessing. | 56 Helper function for multiprocessing. | 
| 44 That function is a wrapper around the following command: | 57 That function is a wrapper around the following command: | 
| 45 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | 58 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | 
| 46 """ | 59 """ | 
| 47 | 60 | 
| 48 if args.oformat == 'names': | 61 if args.oformat == "names": | 
| 49 opts = '-osmi -xt' | 62 opts = "-osmi -xt" | 
| 50 else: | 63 else: | 
| 51 opts = '-o%s' % args.oformat | 64 opts = "-o%s" % args.oformat | 
| 52 | 65 | 
| 53 tmp = tempfile.NamedTemporaryFile(delete=False) | 66 tmp = tempfile.NamedTemporaryFile(delete=False) | 
| 54 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) | 67 cmd = "obabel -ifs %s -O %s %s -s%s -al %s" % ( | 
| 68 args.fastsearch_index, | |
| 69 tmp.name, | |
| 70 opts, | |
| 71 query, | |
| 72 args.max_candidates, | |
| 73 ) | |
| 55 | 74 | 
| 56 child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 75 child = subprocess.Popen( | 
| 76 cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
| 77 ) | |
| 57 | 78 | 
| 58 stdout, stderr = child.communicate() | 79 stdout, stderr = child.communicate() | 
| 59 return_code = child.returncode | 80 return_code = child.returncode | 
| 60 | 81 | 
| 61 if return_code: | 82 if return_code: | 
| 71 | 92 | 
| 72 def get_smiles_or_smarts(args): | 93 def get_smiles_or_smarts(args): | 
| 73 """ | 94 """ | 
| 74 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | 95 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | 
| 75 """ | 96 """ | 
| 76 if args.iformat in ['smi', 'text', 'tabular']: | 97 if args.iformat in ["smi", "text", "tabular"]: | 
| 77 with open(args.infile) as text_file: | 98 with open(args.infile) as text_file: | 
| 78 for line in text_file: | 99 for line in text_file: | 
| 79 yield line.split('\t')[0].strip() | 100 yield line.split("\t")[0].strip() | 
| 80 else: | 101 else: | 
| 81 # inchi or sdf files | 102 # inchi or sdf files | 
| 82 for mol in pybel.readfile(args.iformat, args.infile): | 103 for mol in pybel.readfile(args.iformat, args.infile): | 
| 83 yield mol.write('smiles').split('\t')[0] | 104 yield mol.write("smiles").split("\t")[0] | 
| 84 | 105 | 
| 85 | 106 | 
| 86 def substructure_search(args): | 107 def substructure_search(args): | 
| 87 pool = multiprocessing.Pool(args.processors) | 108 pool = multiprocessing.Pool(args.processors) | 
| 88 for query in get_smiles_or_smarts(args): | 109 for query in get_smiles_or_smarts(args): | 
| 89 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | 110 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | 
| 90 # mp_callback(mp_helper(query, args)) | 111 # mp_callback(mp_helper(query, args)) | 
| 91 pool.close() | 112 pool.close() | 
| 92 pool.join() | 113 pool.join() | 
| 93 | 114 | 
| 94 if args.oformat == 'names': | 115 if args.oformat == "names": | 
| 95 out_handle = open(args.outfile, 'w') | 116 out_handle = open(args.outfile, "w") | 
| 96 for result_file, query in results: | 117 for result_file, query in results: | 
| 97 with open(result_file) as res_handle: | 118 with open(result_file) as res_handle: | 
| 98 for line in res_handle: | 119 for line in res_handle: | 
| 99 out_handle.write('%s\t%s\n' % (line.strip(), query)) | 120 out_handle.write("%s\t%s\n" % (line.strip(), query)) | 
| 100 os.remove(result_file) | 121 os.remove(result_file) | 
| 101 out_handle.close() | 122 out_handle.close() | 
| 102 else: | 123 else: | 
| 103 out_handle = open(args.outfile, 'wb') | 124 out_handle = open(args.outfile, "wb") | 
| 104 for result_file, query in results: | 125 for result_file, query in results: | 
| 105 res_handle = open(result_file, 'rb') | 126 res_handle = open(result_file, "rb") | 
| 106 shutil.copyfileobj(res_handle, out_handle) | 127 shutil.copyfileobj(res_handle, out_handle) | 
| 107 res_handle.close() | 128 res_handle.close() | 
| 108 os.remove(result_file) | 129 os.remove(result_file) | 
| 109 out_handle.close() | 130 out_handle.close() | 
| 110 | 131 | 
| 111 | 132 | 
| 112 def __main__(): | 133 def __main__(): | 
| 113 """ | 134 """ | 
| 114 Multiprocessing Open Babel Substructure Search. | 135 Multiprocessing Open Babel Substructure Search. | 
| 115 """ | 136 """ | 
| 116 args = parse_command_line() | 137 args = parse_command_line() | 
| 117 substructure_search(args) | 138 substructure_search(args) | 
| 118 | 139 | 
| 119 | 140 | 
