comparison wrapper_biotransformer.py @ 3:6080aee7c4f6 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
author recetox
date Wed, 13 Jan 2021 11:17:53 +0000
parents 362a66a3889c
children 77f693bb14ac
comparison
equal deleted inserted replaced
2:3998017c374b 3:6080aee7c4f6
1 import subprocess 1 import subprocess
2 import sys 2 import sys
3 import tempfile 3 import tempfile
4 import re
5 import pandas
4 6
5 import pandas 7 from openbabel import openbabel, pybel
6 from openbabel import pybel 8 openbabel.obErrorLog.StopLogging()
7 9
8 10
9 # function for translating inchi to smiles 11 # function for translating inchi to smiles
10 def InchiToSmiles(df): 12 def InchiToSmiles(df):
11 sm = [] 13 sm = []
44 smList1 = [] # list with smiles string 46 smList1 = [] # list with smiles string
45 smList2 = [] 47 smList2 = []
46 smList3 = [] 48 smList3 = []
47 for _, (smiles,) in in_df.iterrows(): 49 for _, (smiles,) in in_df.iterrows():
48 with tempfile.NamedTemporaryFile() as out: 50 with tempfile.NamedTemporaryFile() as out:
49 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) 51 print("Working on compound: " + smiles)
50 tmp2 = pandas.read_csv(out.name) 52 if not re.search(r'\.', smiles):
51 tmp3 = pandas.read_csv(out.name) 53 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
52 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) 54 try:
53 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) 55 tmp2 = pandas.read_csv(out.name)
54 smList2.append([smiles] * tmp2.shape[0]) 56 tmp3 = pandas.read_csv(out.name)
55 smList3.append([smiles] * tmp3.shape[0]) 57 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
56 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) 58 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
57 out_df2 = pandas.concat([out_df2, tmp2]) 59 smList2.append([smiles] * tmp2.shape[0])
58 out_df3 = pandas.concat([out_df3, tmp3]) 60 smList3.append([smiles] * tmp3.shape[0])
59 smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) 61 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)])
62 out_df2 = pandas.concat([out_df2, tmp2])
63 out_df3 = pandas.concat([out_df3, tmp3])
64 smList1.append([smiles] * pandas.read_csv(out.name).shape[0])
65 except pandas.errors.EmptyDataError:
66 continue
67 else:
68 print("ERROR: Input compound cannot be a mixture.")
60 smList1 = sum(smList1, []) # merge sublists into one list 69 smList1 = sum(smList1, []) # merge sublists into one list
61 smList2 = sum(smList2, []) 70 smList2 = sum(smList2, [])
62 smList3 = sum(smList3, []) 71 smList3 = sum(smList3, [])
63 72
64 out_df1.insert(0, "SMILES query", smList1) 73 out_df1.insert(0, "SMILES query", smList1)