Mercurial > repos > bgruening > openbabel_remduplicates
comparison subsearch.py @ 15:f3099132512d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
| author | bgruening |
|---|---|
| date | Thu, 15 Aug 2024 10:54:17 +0000 |
| parents | d44de092fef3 |
| children |
comparison
equal
deleted
inserted
replaced
| 14:89e8077589f2 | 15:f3099132512d |
|---|---|
| 11 import subprocess | 11 import subprocess |
| 12 import sys | 12 import sys |
| 13 import tempfile | 13 import tempfile |
| 14 | 14 |
| 15 from openbabel import openbabel, pybel | 15 from openbabel import openbabel, pybel |
| 16 | |
| 16 openbabel.obErrorLog.StopLogging() | 17 openbabel.obErrorLog.StopLogging() |
| 17 | 18 |
| 18 | 19 |
| 19 def parse_command_line(): | 20 def parse_command_line(): |
| 20 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() |
| 21 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 22 parser.add_argument("-i", "--infile", required=True, help="Molecule file.") |
| 22 parser.add_argument('--iformat', help='Input format.') | 23 parser.add_argument("--iformat", help="Input format.") |
| 23 parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, | 24 parser.add_argument( |
| 24 help='Path to the openbabel fastsearch index.') | 25 "--fastsearch-index", |
| 25 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 26 dest="fastsearch_index", |
| 26 parser.add_argument('--oformat', default='smi', help='Output file format') | 27 required=True, |
| 27 parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, | 28 help="Path to the openbabel fastsearch index.", |
| 28 help="The maximum number of candidates.") | 29 ) |
| 29 parser.add_argument('-p', '--processors', type=int, | 30 parser.add_argument( |
| 30 default=multiprocessing.cpu_count()) | 31 "-o", "--outfile", required=True, help="Path to the output file." |
| 32 ) | |
| 33 parser.add_argument("--oformat", default="smi", help="Output file format") | |
| 34 parser.add_argument( | |
| 35 "--max-candidates", | |
| 36 dest="max_candidates", | |
| 37 type=int, | |
| 38 default=4000, | |
| 39 help="The maximum number of candidates.", | |
| 40 ) | |
| 41 parser.add_argument( | |
| 42 "-p", "--processors", type=int, default=multiprocessing.cpu_count() | |
| 43 ) | |
| 31 return parser.parse_args() | 44 return parser.parse_args() |
| 32 | 45 |
| 33 | 46 |
| 34 results = list() | 47 results = list() |
| 35 | 48 |
| 38 results.append(res) | 51 results.append(res) |
| 39 | 52 |
| 40 | 53 |
| 41 def mp_helper(query, args): | 54 def mp_helper(query, args): |
| 42 """ | 55 """ |
| 43 Helper function for multiprocessing. | 56 Helper function for multiprocessing. |
| 44 That function is a wrapper around the following command: | 57 That function is a wrapper around the following command: |
| 45 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | 58 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 |
| 46 """ | 59 """ |
| 47 | 60 |
| 48 if args.oformat == 'names': | 61 if args.oformat == "names": |
| 49 opts = '-osmi -xt' | 62 opts = "-osmi -xt" |
| 50 else: | 63 else: |
| 51 opts = '-o%s' % args.oformat | 64 opts = "-o%s" % args.oformat |
| 52 | 65 |
| 53 tmp = tempfile.NamedTemporaryFile(delete=False) | 66 tmp = tempfile.NamedTemporaryFile(delete=False) |
| 54 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) | 67 cmd = "obabel -ifs %s -O %s %s -s%s -al %s" % ( |
| 68 args.fastsearch_index, | |
| 69 tmp.name, | |
| 70 opts, | |
| 71 query, | |
| 72 args.max_candidates, | |
| 73 ) | |
| 55 | 74 |
| 56 child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 75 child = subprocess.Popen( |
| 76 cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
| 77 ) | |
| 57 | 78 |
| 58 stdout, stderr = child.communicate() | 79 stdout, stderr = child.communicate() |
| 59 return_code = child.returncode | 80 return_code = child.returncode |
| 60 | 81 |
| 61 if return_code: | 82 if return_code: |
| 71 | 92 |
| 72 def get_smiles_or_smarts(args): | 93 def get_smiles_or_smarts(args): |
| 73 """ | 94 """ |
| 74 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | 95 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. |
| 75 """ | 96 """ |
| 76 if args.iformat in ['smi', 'text', 'tabular']: | 97 if args.iformat in ["smi", "text", "tabular"]: |
| 77 with open(args.infile) as text_file: | 98 with open(args.infile) as text_file: |
| 78 for line in text_file: | 99 for line in text_file: |
| 79 yield line.split('\t')[0].strip() | 100 yield line.split("\t")[0].strip() |
| 80 else: | 101 else: |
| 81 # inchi or sdf files | 102 # inchi or sdf files |
| 82 for mol in pybel.readfile(args.iformat, args.infile): | 103 for mol in pybel.readfile(args.iformat, args.infile): |
| 83 yield mol.write('smiles').split('\t')[0] | 104 yield mol.write("smiles").split("\t")[0] |
| 84 | 105 |
| 85 | 106 |
| 86 def substructure_search(args): | 107 def substructure_search(args): |
| 87 pool = multiprocessing.Pool(args.processors) | 108 pool = multiprocessing.Pool(args.processors) |
| 88 for query in get_smiles_or_smarts(args): | 109 for query in get_smiles_or_smarts(args): |
| 89 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | 110 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) |
| 90 # mp_callback(mp_helper(query, args)) | 111 # mp_callback(mp_helper(query, args)) |
| 91 pool.close() | 112 pool.close() |
| 92 pool.join() | 113 pool.join() |
| 93 | 114 |
| 94 if args.oformat == 'names': | 115 if args.oformat == "names": |
| 95 out_handle = open(args.outfile, 'w') | 116 out_handle = open(args.outfile, "w") |
| 96 for result_file, query in results: | 117 for result_file, query in results: |
| 97 with open(result_file) as res_handle: | 118 with open(result_file) as res_handle: |
| 98 for line in res_handle: | 119 for line in res_handle: |
| 99 out_handle.write('%s\t%s\n' % (line.strip(), query)) | 120 out_handle.write("%s\t%s\n" % (line.strip(), query)) |
| 100 os.remove(result_file) | 121 os.remove(result_file) |
| 101 out_handle.close() | 122 out_handle.close() |
| 102 else: | 123 else: |
| 103 out_handle = open(args.outfile, 'wb') | 124 out_handle = open(args.outfile, "wb") |
| 104 for result_file, query in results: | 125 for result_file, query in results: |
| 105 res_handle = open(result_file, 'rb') | 126 res_handle = open(result_file, "rb") |
| 106 shutil.copyfileobj(res_handle, out_handle) | 127 shutil.copyfileobj(res_handle, out_handle) |
| 107 res_handle.close() | 128 res_handle.close() |
| 108 os.remove(result_file) | 129 os.remove(result_file) |
| 109 out_handle.close() | 130 out_handle.close() |
| 110 | 131 |
| 111 | 132 |
| 112 def __main__(): | 133 def __main__(): |
| 113 """ | 134 """ |
| 114 Multiprocessing Open Babel Substructure Search. | 135 Multiprocessing Open Babel Substructure Search. |
| 115 """ | 136 """ |
| 116 args = parse_command_line() | 137 args = parse_command_line() |
| 117 substructure_search(args) | 138 substructure_search(args) |
| 118 | 139 |
| 119 | 140 |
