Mercurial > repos > recetox > aplcms_to_ramclustr_converter
annotate aplcms_to_ramclustr_converter.py @ 3:07667688735e draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
author | recetox |
---|---|
date | Wed, 17 Feb 2021 15:14:33 +0000 |
parents | |
children | 9ea34e24474f |
rev | line source |
---|---|
3
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
1 #!/usr/bin/env python |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
2 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
3 import argparse |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
4 import sys |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
5 import warnings |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
6 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
7 import pandas as pd |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
8 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
9 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
10 warnings.simplefilter('ignore') |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
11 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
12 parser = argparse.ArgumentParser() |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
13 parser.add_argument("--dataframe", help="Name of hdf dataframe") |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
14 parser.add_argument("--table", help="Name of a table in the dataframe") |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
15 parser.add_argument('output') |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
16 args = parser.parse_args() |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
17 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
18 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
19 def extract_data(table): |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
20 num_samples = int((len(table.columns.tolist()) - 4) / 2) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
21 mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
22 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
23 intensities = table.iloc[:, 4:(4 + num_samples)] |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
24 sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()] |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
25 ramclustr_data = pd.DataFrame({'mz_rt': mz_rt}) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
26 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
27 for idx in range(num_samples): |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
28 label = sample_labels[idx] |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
29 ramclustr_data[label] = intensities.iloc[:, idx] |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
30 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
31 return ramclustr_data |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
32 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
33 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
34 def format_table(ramclustr_data): |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
35 ramclustr_data.set_index('mz_rt', inplace=True) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
36 ramclustr_data = ramclustr_data.transpose() |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
37 ramclustr_data.index.rename('sample', inplace=True) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
38 return ramclustr_data |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
39 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
40 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
41 def main(): |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
42 try: |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
43 aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None') |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
44 except KeyError: |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
45 msg = "Selected table does not exist in HDF dataframe" |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
46 print(msg, file=sys.stderr) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
47 sys.exit(1) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
48 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
49 ramclustr_data = extract_data(aplcms_table) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
50 ramclustr_table = format_table(ramclustr_data) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
51 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
52 ramclustr_table.to_csv(args.output, sep=',') |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
53 msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
54 print(msg, file=sys.stdout) |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
55 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
56 |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
57 if __name__ == "__main__": |
07667688735e
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
recetox
parents:
diff
changeset
|
58 main() |