Mercurial > repos > recetox > biotransformer
comparison wrapper_biotransformer.py @ 4:77f693bb14ac draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
author | recetox |
---|---|
date | Mon, 11 Apr 2022 10:09:39 +0000 |
parents | 6080aee7c4f6 |
children | c0fe7ad30ade |
comparison
equal
deleted
inserted
replaced
3:6080aee7c4f6 | 4:77f693bb14ac |
---|---|
6 | 6 |
7 from openbabel import openbabel, pybel | 7 from openbabel import openbabel, pybel |
8 openbabel.obErrorLog.StopLogging() | 8 openbabel.obErrorLog.StopLogging() |
9 | 9 |
10 | 10 |
11 # function for translating inchi to smiles | |
12 def InchiToSmiles(df): | 11 def InchiToSmiles(df): |
12 '''Translate inchi to smiles''' | |
13 sm = [] | 13 sm = [] |
14 for item in df['InChI']: | 14 for item in df['InChI']: |
15 tmp = pybel.readstring("inchi", item) | 15 tmp = pybel.readstring("inchi", item) |
16 sm.append(tmp.write("smi")) | 16 sm.append(tmp.write("smi")) |
17 return(sm) | 17 return(sm) |
18 | 18 |
19 | 19 |
20 executable = ["biotransformer"] | 20 executable = ["biotransformer"] |
21 # executable_r = ["Rscript", "inchi_to_smiles.r"] | |
22 | 21 |
23 argv = sys.argv[1:] | 22 argv = sys.argv[1:] |
24 if "-icsv" in argv: | 23 icsv = argv.pop(argv.index("-icsv") + 1) |
25 icsv = argv.pop(argv.index("-icsv") + 1) | 24 argv.remove("-icsv") |
26 argv.remove("-icsv") | 25 ocsv = argv.pop(argv.index("-ocsv") + 1) |
26 argv.remove("-ocsv") | |
27 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) | |
28 argv.remove("-ocsvDup") | |
29 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) | |
30 argv.remove("-ocsvDup2") | |
27 | 31 |
28 if "-ocsv" not in argv: | 32 in_df = pandas.read_csv(icsv, header=None) |
29 sys.stderr.write("excpected -ocsv parameter\n") | 33 out_df1 = pandas.DataFrame() # all results |
30 sys.exit(1) | 34 out_df2 = pandas.DataFrame() # filtered results based on 6 columns |
31 ocsv = argv.pop(argv.index("-ocsv") + 1) | 35 out_df3 = pandas.DataFrame() # filtered results based on 3 columns |
32 argv.remove("-ocsv") | |
33 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) | |
34 argv.remove("-ocsvDup") | |
35 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) | |
36 argv.remove("-ocsvDup2") | |
37 | 36 |
38 in_df = pandas.read_csv(icsv, header=None) | 37 smList1 = [] # list with smiles string |
39 out_df1 = pandas.DataFrame() # all results | 38 smList2 = [] |
40 out_df2 = pandas.DataFrame() # filtered results based on 6 columns | 39 smList3 = [] |
41 out_df3 = pandas.DataFrame() # filtered results based on 3 columns | 40 for _, (smiles,) in in_df.iterrows(): |
41 with tempfile.NamedTemporaryFile() as out: | |
42 print("Working on compound: " + smiles) | |
43 if not re.search(r'\.', smiles): | |
44 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) | |
45 try: | |
46 bio_out = pandas.read_csv(out.name) | |
47 tmp2 = bio_out.drop_duplicates(subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | |
48 tmp3 = bio_out.drop_duplicates(subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) | |
42 | 49 |
43 tmp2 = pandas.DataFrame() | 50 smList1.append([smiles] * bio_out.shape[0]) |
44 tmp3 = pandas.DataFrame() | 51 smList2.append([smiles] * tmp2.shape[0]) |
52 smList3.append([smiles] * tmp3.shape[0]) | |
45 | 53 |
46 smList1 = [] # list with smiles string | 54 out_df1 = pandas.concat([out_df1, bio_out]) |
47 smList2 = [] | 55 out_df2 = pandas.concat([out_df2, tmp2]) |
48 smList3 = [] | 56 out_df3 = pandas.concat([out_df3, tmp3]) |
49 for _, (smiles,) in in_df.iterrows(): | 57 except pandas.errors.EmptyDataError: |
50 with tempfile.NamedTemporaryFile() as out: | 58 continue |
51 print("Working on compound: " + smiles) | 59 else: |
52 if not re.search(r'\.', smiles): | 60 print("ERROR: Input compound cannot be a mixture.") |
53 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) | 61 smList1 = sum(smList1, []) # merge sublists into one list |
54 try: | 62 smList2 = sum(smList2, []) |
55 tmp2 = pandas.read_csv(out.name) | 63 smList3 = sum(smList3, []) |
56 tmp3 = pandas.read_csv(out.name) | |
57 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | |
58 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) | |
59 smList2.append([smiles] * tmp2.shape[0]) | |
60 smList3.append([smiles] * tmp3.shape[0]) | |
61 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) | |
62 out_df2 = pandas.concat([out_df2, tmp2]) | |
63 out_df3 = pandas.concat([out_df3, tmp3]) | |
64 smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) | |
65 except pandas.errors.EmptyDataError: | |
66 continue | |
67 else: | |
68 print("ERROR: Input compound cannot be a mixture.") | |
69 smList1 = sum(smList1, []) # merge sublists into one list | |
70 smList2 = sum(smList2, []) | |
71 smList3 = sum(smList3, []) | |
72 | 64 |
73 out_df1.insert(0, "SMILES query", smList1) | 65 out_df1.insert(0, "SMILES query", smList1) |
74 out_df1.drop_duplicates(inplace=True) | 66 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) |
75 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) | 67 out_df1.to_csv(ocsv, sep ='\t') |
76 out_df1.to_csv(ocsv) | |
77 | 68 |
78 out_df2.insert(0, "SMILES query", smList2) | 69 out_df2.insert(0, "SMILES query", smList2) |
79 out_df3.insert(0, "SMILES query", smList3) | 70 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) |
80 out_df2.drop_duplicates(inplace=True) | 71 out_df2.to_csv(ocsv_dup, sep ='\t') |
81 out_df3.drop_duplicates(inplace=True) | 72 |
82 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) | 73 out_df3.insert(0, "SMILES query", smList3) |
83 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) | 74 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) |
84 # out_df.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | 75 out_df3.to_csv(ocsv_dup2, sep ='\t') |
85 out_df2.to_csv(ocsv_dup) | |
86 out_df3.to_csv(ocsv_dup2) | |
87 else: | |
88 # code = subprocess.run(executable + argv).returncode | |
89 # sys.exit(code) | |
90 subprocess.run(executable + argv) | |
91 smile = argv.pop(argv.index("-ismi") + 1) | |
92 tmp = pandas.DataFrame() | |
93 out = argv.pop(argv.index("-ocsv") + 1) | |
94 tmp = pandas.read_csv(out) # reads created output file | |
95 tmp.insert(0, "SMILES query", smile) # add SMILES string for query | |
96 tmp.insert(1, "SMILES target", InchiToSmiles(tmp)) # add SMILES string for target | |
97 tmp.to_csv(out) |