annotate hdf_converter.py @ 2:644192cf22a5 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
author recetox
date Wed, 13 Jan 2021 15:55:42 +0000
parents 52470d439e50
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
1 #!/usr/bin/env python
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
2
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
3 import argparse
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
4 import sys
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
5 import warnings
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
6
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
7 import pandas as pd
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
8
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
9
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
10 warnings.simplefilter('ignore')
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
11
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
12 parser = argparse.ArgumentParser()
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
13 parser.add_argument("--dataframe", help="Name of hdf dataframe")
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
14 parser.add_argument("--table", help="Name of a table in the dataframe")
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
15 parser.add_argument('output')
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
16 args = parser.parse_args()
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
17
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
18
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
19 def extract_data(table):
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
20 num_samples = int((len(table.columns.tolist()) - 4) / 2)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
21 mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
22
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
23 intensities = table.iloc[:, 4:(4 + num_samples)]
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
24 sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()]
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
25 ramclustr_data = pd.DataFrame({'mz_rt': mz_rt})
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
26
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
27 for idx in range(num_samples):
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
28 label = sample_labels[idx]
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
29 ramclustr_data[label] = intensities.iloc[:, idx]
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
30
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
31 return ramclustr_data
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
32
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
33
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
34 def format_table(ramclustr_data):
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
35 ramclustr_data.set_index('mz_rt', inplace=True)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
36 ramclustr_data = ramclustr_data.transpose()
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
37 ramclustr_data.index.rename('sample', inplace=True)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
38 return ramclustr_data
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
39
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
40
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
41 def main():
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
42 try:
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
43 aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None')
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
44 except KeyError:
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
45 msg = "Selected table does not exist in HDF dataframe"
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
46 print(msg, file=sys.stderr)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
47 sys.exit(1)
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
48
2
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
49 ramclustr_data = extract_data(aplcms_table)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
50 ramclustr_table = format_table(ramclustr_data)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
51
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
52 ramclustr_table.to_csv(args.output, sep=',')
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
53 msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table)
644192cf22a5 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
recetox
parents: 1
diff changeset
54 print(msg, file=sys.stdout)
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
55
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
56
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
57 if __name__ == "__main__":
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
58 main()