comparison wrapper_biotransformer.py @ 4:77f693bb14ac draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
author recetox
date Mon, 11 Apr 2022 10:09:39 +0000
parents 6080aee7c4f6
children c0fe7ad30ade
comparison
equal deleted inserted replaced
3:6080aee7c4f6 4:77f693bb14ac
6 6
7 from openbabel import openbabel, pybel 7 from openbabel import openbabel, pybel
8 openbabel.obErrorLog.StopLogging() 8 openbabel.obErrorLog.StopLogging()
9 9
10 10
11 # function for translating inchi to smiles
12 def InchiToSmiles(df): 11 def InchiToSmiles(df):
12 '''Translate inchi to smiles'''
13 sm = [] 13 sm = []
14 for item in df['InChI']: 14 for item in df['InChI']:
15 tmp = pybel.readstring("inchi", item) 15 tmp = pybel.readstring("inchi", item)
16 sm.append(tmp.write("smi")) 16 sm.append(tmp.write("smi"))
17 return(sm) 17 return(sm)
18 18
19 19
20 executable = ["biotransformer"] 20 executable = ["biotransformer"]
21 # executable_r = ["Rscript", "inchi_to_smiles.r"]
22 21
23 argv = sys.argv[1:] 22 argv = sys.argv[1:]
24 if "-icsv" in argv: 23 icsv = argv.pop(argv.index("-icsv") + 1)
25 icsv = argv.pop(argv.index("-icsv") + 1) 24 argv.remove("-icsv")
26 argv.remove("-icsv") 25 ocsv = argv.pop(argv.index("-ocsv") + 1)
26 argv.remove("-ocsv")
27 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1)
28 argv.remove("-ocsvDup")
29 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1)
30 argv.remove("-ocsvDup2")
27 31
28 if "-ocsv" not in argv: 32 in_df = pandas.read_csv(icsv, header=None)
29 sys.stderr.write("excpected -ocsv parameter\n") 33 out_df1 = pandas.DataFrame() # all results
30 sys.exit(1) 34 out_df2 = pandas.DataFrame() # filtered results based on 6 columns
31 ocsv = argv.pop(argv.index("-ocsv") + 1) 35 out_df3 = pandas.DataFrame() # filtered results based on 3 columns
32 argv.remove("-ocsv")
33 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1)
34 argv.remove("-ocsvDup")
35 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1)
36 argv.remove("-ocsvDup2")
37 36
38 in_df = pandas.read_csv(icsv, header=None) 37 smList1 = [] # list with smiles string
39 out_df1 = pandas.DataFrame() # all results 38 smList2 = []
40 out_df2 = pandas.DataFrame() # filtered results based on 6 columns 39 smList3 = []
41 out_df3 = pandas.DataFrame() # filtered results based on 3 columns 40 for _, (smiles,) in in_df.iterrows():
41 with tempfile.NamedTemporaryFile() as out:
42 print("Working on compound: " + smiles)
43 if not re.search(r'\.', smiles):
44 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
45 try:
46 bio_out = pandas.read_csv(out.name)
47 tmp2 = bio_out.drop_duplicates(subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
48 tmp3 = bio_out.drop_duplicates(subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
42 49
43 tmp2 = pandas.DataFrame() 50 smList1.append([smiles] * bio_out.shape[0])
44 tmp3 = pandas.DataFrame() 51 smList2.append([smiles] * tmp2.shape[0])
52 smList3.append([smiles] * tmp3.shape[0])
45 53
46 smList1 = [] # list with smiles string 54 out_df1 = pandas.concat([out_df1, bio_out])
47 smList2 = [] 55 out_df2 = pandas.concat([out_df2, tmp2])
48 smList3 = [] 56 out_df3 = pandas.concat([out_df3, tmp3])
49 for _, (smiles,) in in_df.iterrows(): 57 except pandas.errors.EmptyDataError:
50 with tempfile.NamedTemporaryFile() as out: 58 continue
51 print("Working on compound: " + smiles) 59 else:
52 if not re.search(r'\.', smiles): 60 print("ERROR: Input compound cannot be a mixture.")
53 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) 61 smList1 = sum(smList1, []) # merge sublists into one list
54 try: 62 smList2 = sum(smList2, [])
55 tmp2 = pandas.read_csv(out.name) 63 smList3 = sum(smList3, [])
56 tmp3 = pandas.read_csv(out.name)
57 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
58 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
59 smList2.append([smiles] * tmp2.shape[0])
60 smList3.append([smiles] * tmp3.shape[0])
61 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)])
62 out_df2 = pandas.concat([out_df2, tmp2])
63 out_df3 = pandas.concat([out_df3, tmp3])
64 smList1.append([smiles] * pandas.read_csv(out.name).shape[0])
65 except pandas.errors.EmptyDataError:
66 continue
67 else:
68 print("ERROR: Input compound cannot be a mixture.")
69 smList1 = sum(smList1, []) # merge sublists into one list
70 smList2 = sum(smList2, [])
71 smList3 = sum(smList3, [])
72 64
73 out_df1.insert(0, "SMILES query", smList1) 65 out_df1.insert(0, "SMILES query", smList1)
74 out_df1.drop_duplicates(inplace=True) 66 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1))
75 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) 67 out_df1.to_csv(ocsv, sep ='\t')
76 out_df1.to_csv(ocsv)
77 68
78 out_df2.insert(0, "SMILES query", smList2) 69 out_df2.insert(0, "SMILES query", smList2)
79 out_df3.insert(0, "SMILES query", smList3) 70 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2))
80 out_df2.drop_duplicates(inplace=True) 71 out_df2.to_csv(ocsv_dup, sep ='\t')
81 out_df3.drop_duplicates(inplace=True) 72
82 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) 73 out_df3.insert(0, "SMILES query", smList3)
83 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) 74 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3))
84 # out_df.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) 75 out_df3.to_csv(ocsv_dup2, sep ='\t')
85 out_df2.to_csv(ocsv_dup)
86 out_df3.to_csv(ocsv_dup2)
87 else:
88 # code = subprocess.run(executable + argv).returncode
89 # sys.exit(code)
90 subprocess.run(executable + argv)
91 smile = argv.pop(argv.index("-ismi") + 1)
92 tmp = pandas.DataFrame()
93 out = argv.pop(argv.index("-ocsv") + 1)
94 tmp = pandas.read_csv(out) # reads created output file
95 tmp.insert(0, "SMILES query", smile) # add SMILES string for query
96 tmp.insert(1, "SMILES target", InchiToSmiles(tmp)) # add SMILES string for target
97 tmp.to_csv(out)