Mercurial > repos > recetox > aplcms_to_ramclustr_converter
comparison aplcms_to_ramclustr_converter.py @ 3:07667688735e draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
author | recetox |
---|---|
date | Wed, 17 Feb 2021 15:14:33 +0000 |
parents | |
children | 9ea34e24474f |
comparison
equal
deleted
inserted
replaced
2:644192cf22a5 | 3:07667688735e |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import sys | |
5 import warnings | |
6 | |
7 import pandas as pd | |
8 | |
9 | |
10 warnings.simplefilter('ignore') | |
11 | |
12 parser = argparse.ArgumentParser() | |
13 parser.add_argument("--dataframe", help="Name of hdf dataframe") | |
14 parser.add_argument("--table", help="Name of a table in the dataframe") | |
15 parser.add_argument('output') | |
16 args = parser.parse_args() | |
17 | |
18 | |
19 def extract_data(table): | |
20 num_samples = int((len(table.columns.tolist()) - 4) / 2) | |
21 mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str) | |
22 | |
23 intensities = table.iloc[:, 4:(4 + num_samples)] | |
24 sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()] | |
25 ramclustr_data = pd.DataFrame({'mz_rt': mz_rt}) | |
26 | |
27 for idx in range(num_samples): | |
28 label = sample_labels[idx] | |
29 ramclustr_data[label] = intensities.iloc[:, idx] | |
30 | |
31 return ramclustr_data | |
32 | |
33 | |
34 def format_table(ramclustr_data): | |
35 ramclustr_data.set_index('mz_rt', inplace=True) | |
36 ramclustr_data = ramclustr_data.transpose() | |
37 ramclustr_data.index.rename('sample', inplace=True) | |
38 return ramclustr_data | |
39 | |
40 | |
41 def main(): | |
42 try: | |
43 aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None') | |
44 except KeyError: | |
45 msg = "Selected table does not exist in HDF dataframe" | |
46 print(msg, file=sys.stderr) | |
47 sys.exit(1) | |
48 | |
49 ramclustr_data = extract_data(aplcms_table) | |
50 ramclustr_table = format_table(ramclustr_data) | |
51 | |
52 ramclustr_table.to_csv(args.output, sep=',') | |
53 msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table) | |
54 print(msg, file=sys.stdout) | |
55 | |
56 | |
57 if __name__ == "__main__": | |
58 main() |