annotate wrapper_biotransformer.py @ 1:362a66a3889c draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
author recetox
date Tue, 22 Sep 2020 14:42:15 +0000
parents
children 6080aee7c4f6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
1 import subprocess
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
2 import sys
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
3 import tempfile
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
4
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
5 import pandas
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
6 from openbabel import pybel
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
7
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
8
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
9 # function for translating inchi to smiles
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
10 def InchiToSmiles(df):
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
11 sm = []
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
12 for item in df['InChI']:
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
13 tmp = pybel.readstring("inchi", item)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
14 sm.append(tmp.write("smi"))
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
15 return(sm)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
16
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
17
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
18 executable = ["biotransformer"]
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
19 # executable_r = ["Rscript", "inchi_to_smiles.r"]
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
20
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
21 argv = sys.argv[1:]
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
22 if "-icsv" in argv:
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
23 icsv = argv.pop(argv.index("-icsv") + 1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
24 argv.remove("-icsv")
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
25
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
26 if "-ocsv" not in argv:
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
27 sys.stderr.write("excpected -ocsv parameter\n")
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
28 sys.exit(1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
29 ocsv = argv.pop(argv.index("-ocsv") + 1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
30 argv.remove("-ocsv")
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
31 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
32 argv.remove("-ocsvDup")
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
33 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
34 argv.remove("-ocsvDup2")
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
35
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
36 in_df = pandas.read_csv(icsv, header=None)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
37 out_df1 = pandas.DataFrame() # all results
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
38 out_df2 = pandas.DataFrame() # filtered results based on 6 columns
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
39 out_df3 = pandas.DataFrame() # filtered results based on 3 columns
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
40
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
41 tmp2 = pandas.DataFrame()
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
42 tmp3 = pandas.DataFrame()
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
43
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
44 smList1 = [] # list with smiles string
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
45 smList2 = []
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
46 smList3 = []
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
47 for _, (smiles,) in in_df.iterrows():
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
48 with tempfile.NamedTemporaryFile() as out:
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
49 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
50 tmp2 = pandas.read_csv(out.name)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
51 tmp3 = pandas.read_csv(out.name)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
52 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
53 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
54 smList2.append([smiles] * tmp2.shape[0])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
55 smList3.append([smiles] * tmp3.shape[0])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
56 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
57 out_df2 = pandas.concat([out_df2, tmp2])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
58 out_df3 = pandas.concat([out_df3, tmp3])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
59 smList1.append([smiles] * pandas.read_csv(out.name).shape[0])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
60 smList1 = sum(smList1, []) # merge sublists into one list
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
61 smList2 = sum(smList2, [])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
62 smList3 = sum(smList3, [])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
63
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
64 out_df1.insert(0, "SMILES query", smList1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
65 out_df1.drop_duplicates(inplace=True)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
66 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1))
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
67 out_df1.to_csv(ocsv)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
68
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
69 out_df2.insert(0, "SMILES query", smList2)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
70 out_df3.insert(0, "SMILES query", smList3)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
71 out_df2.drop_duplicates(inplace=True)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
72 out_df3.drop_duplicates(inplace=True)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
73 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2))
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
74 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3))
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
75 # out_df.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
76 out_df2.to_csv(ocsv_dup)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
77 out_df3.to_csv(ocsv_dup2)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
78 else:
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
79 # code = subprocess.run(executable + argv).returncode
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
80 # sys.exit(code)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
81 subprocess.run(executable + argv)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
82 smile = argv.pop(argv.index("-ismi") + 1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
83 tmp = pandas.DataFrame()
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
84 out = argv.pop(argv.index("-ocsv") + 1)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
85 tmp = pandas.read_csv(out) # reads created output file
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
86 tmp.insert(0, "SMILES query", smile) # add SMILES string for query
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
87 tmp.insert(1, "SMILES target", InchiToSmiles(tmp)) # add SMILES string for target
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
88 tmp.to_csv(out)