Mercurial > repos > recetox > biotransformer
comparison wrapper_biotransformer.py @ 3:6080aee7c4f6 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
author | recetox |
---|---|
date | Wed, 13 Jan 2021 11:17:53 +0000 |
parents | 362a66a3889c |
children | 77f693bb14ac |
comparison
equal
deleted
inserted
replaced
2:3998017c374b | 3:6080aee7c4f6 |
---|---|
1 import subprocess | 1 import subprocess |
2 import sys | 2 import sys |
3 import tempfile | 3 import tempfile |
4 import re | |
5 import pandas | |
4 | 6 |
5 import pandas | 7 from openbabel import openbabel, pybel |
6 from openbabel import pybel | 8 openbabel.obErrorLog.StopLogging() |
7 | 9 |
8 | 10 |
9 # function for translating inchi to smiles | 11 # function for translating inchi to smiles |
10 def InchiToSmiles(df): | 12 def InchiToSmiles(df): |
11 sm = [] | 13 sm = [] |
44 smList1 = [] # list with smiles string | 46 smList1 = [] # list with smiles string |
45 smList2 = [] | 47 smList2 = [] |
46 smList3 = [] | 48 smList3 = [] |
47 for _, (smiles,) in in_df.iterrows(): | 49 for _, (smiles,) in in_df.iterrows(): |
48 with tempfile.NamedTemporaryFile() as out: | 50 with tempfile.NamedTemporaryFile() as out: |
49 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) | 51 print("Working on compound: " + smiles) |
50 tmp2 = pandas.read_csv(out.name) | 52 if not re.search(r'\.', smiles): |
51 tmp3 = pandas.read_csv(out.name) | 53 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) |
52 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | 54 try: |
53 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) | 55 tmp2 = pandas.read_csv(out.name) |
54 smList2.append([smiles] * tmp2.shape[0]) | 56 tmp3 = pandas.read_csv(out.name) |
55 smList3.append([smiles] * tmp3.shape[0]) | 57 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) |
56 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) | 58 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) |
57 out_df2 = pandas.concat([out_df2, tmp2]) | 59 smList2.append([smiles] * tmp2.shape[0]) |
58 out_df3 = pandas.concat([out_df3, tmp3]) | 60 smList3.append([smiles] * tmp3.shape[0]) |
59 smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) | 61 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) |
62 out_df2 = pandas.concat([out_df2, tmp2]) | |
63 out_df3 = pandas.concat([out_df3, tmp3]) | |
64 smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) | |
65 except pandas.errors.EmptyDataError: | |
66 continue | |
67 else: | |
68 print("ERROR: Input compound cannot be a mixture.") | |
60 smList1 = sum(smList1, []) # merge sublists into one list | 69 smList1 = sum(smList1, []) # merge sublists into one list |
61 smList2 = sum(smList2, []) | 70 smList2 = sum(smList2, []) |
62 smList3 = sum(smList3, []) | 71 smList3 = sum(smList3, []) |
63 | 72 |
64 out_df1.insert(0, "SMILES query", smList1) | 73 out_df1.insert(0, "SMILES query", smList1) |