annotate wrapper_biotransformer.py @ 7:c32d024d6d68 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 038d752ab3a1eb53f9599f219da97faaa67bf08d
author recetox
date Fri, 23 Jun 2023 10:06:47 +0000
parents c0fe7ad30ade
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
c0fe7ad30ade planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
recetox
parents: 4
diff changeset
1 import re
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
2 import subprocess
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
3 import sys
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
4 import tempfile
5
c0fe7ad30ade planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
recetox
parents: 4
diff changeset
5
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
6 import pandas
3
6080aee7c4f6 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
7 from openbabel import openbabel, pybel
6080aee7c4f6 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
8 openbabel.obErrorLog.StopLogging()
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
9
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
10
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
11 def InchiToSmiles(df):
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
12 '''Translate inchi to smiles'''
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
13 sm = []
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
14 for item in df['InChI']:
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
15 tmp = pybel.readstring("inchi", item)
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
16 sm.append(tmp.write("smi"))
5
c0fe7ad30ade planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
recetox
parents: 4
diff changeset
17 return sm
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
18
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
19
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
20 executable = ["biotransformer"]
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
21
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
22 argv = sys.argv[1:]
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
23 icsv = argv.pop(argv.index("-icsv") + 1)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
24 argv.remove("-icsv")
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
25 ocsv = argv.pop(argv.index("-ocsv") + 1)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
26 argv.remove("-ocsv")
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
27 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
28 argv.remove("-ocsvDup")
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
29 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
30 argv.remove("-ocsvDup2")
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
31
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
32 in_df = pandas.read_csv(icsv, header=None)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
33 out_df1 = pandas.DataFrame() # all results
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
34 out_df2 = pandas.DataFrame() # filtered results based on 6 columns
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
35 out_df3 = pandas.DataFrame() # filtered results based on 3 columns
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
36
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
37 smList1 = [] # list with smiles string
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
38 smList2 = []
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
39 smList3 = []
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
40 for _, (smiles,) in in_df.iterrows():
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
41 with tempfile.NamedTemporaryFile() as out:
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
42 print("Working on compound: " + smiles)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
43 if not re.search(r'\.', smiles):
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
44 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
45 try:
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
46 bio_out = pandas.read_csv(out.name)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
47 tmp2 = bio_out.drop_duplicates(subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
48 tmp3 = bio_out.drop_duplicates(subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
49
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
50 smList1.append([smiles] * bio_out.shape[0])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
51 smList2.append([smiles] * tmp2.shape[0])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
52 smList3.append([smiles] * tmp3.shape[0])
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
53
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
54 out_df1 = pandas.concat([out_df1, bio_out])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
55 out_df2 = pandas.concat([out_df2, tmp2])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
56 out_df3 = pandas.concat([out_df3, tmp3])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
57 except pandas.errors.EmptyDataError:
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
58 continue
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
59 else:
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
60 print("ERROR: Input compound cannot be a mixture.")
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
61 smList1 = sum(smList1, []) # merge sublists into one list
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
62 smList2 = sum(smList2, [])
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
63 smList3 = sum(smList3, [])
1
362a66a3889c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
recetox
parents:
diff changeset
64
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
65 out_df1.insert(0, "SMILES query", smList1)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
66 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1))
5
c0fe7ad30ade planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
recetox
parents: 4
diff changeset
67 out_df1.to_csv(ocsv, sep='\t')
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
68
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
69 out_df2.insert(0, "SMILES query", smList2)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
70 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2))
5
c0fe7ad30ade planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
recetox
parents: 4
diff changeset
71 out_df2.to_csv(ocsv_dup, sep='\t')
4
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
72
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
73 out_df3.insert(0, "SMILES query", smList3)
77f693bb14ac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
recetox
parents: 3
diff changeset
74 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3))
5
c0fe7ad30ade planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
recetox
parents: 4
diff changeset
75 out_df3.to_csv(ocsv_dup2, sep='\t')