annotate hdf_converter.py @ 0:062f4c571a24 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
author recetox
date Tue, 15 Dec 2020 17:38:07 +0000
parents
children 52470d439e50
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
1 #!/usr/bin/env python
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
2
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
3 import optparse
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
4 import sys
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
5 import warnings
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
6
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
7 import pandas as pd
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
8
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
9
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
10 warnings.simplefilter('ignore')
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
11
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
12 parser = optparse.OptionParser()
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
13 parser.add_option("--dataframe", help="Name of hdf dataframe")
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
14 parser.add_option("--table", help="Name of a table in the dataframe")
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
15 (options, args) = parser.parse_args()
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
16
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
17
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
18 def extract_samples(table, num_samples, idx):
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
19 intensity_idx = 4 + idx
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
20 rt_idx = 4 + num_samples + idx
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
21 rt_idx_name = table.columns.tolist()[rt_idx]
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
22 table.dropna(subset=[rt_idx_name], inplace=True)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
23 sample_name = table.columns.tolist()[intensity_idx].split('.')[1]
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
24 mzrt = table['mz'].map(str) + '_' + table.iloc[:, rt_idx].map(str)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
25 intensity = table.iloc[:, intensity_idx]
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
26 mzrt_intensity = {'mz_rt': mzrt, sample_name: intensity}
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
27 mzrt_intensity = pd.DataFrame(mzrt_intensity, columns=['mz_rt', sample_name])
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
28 mzrt_intensity.set_index('mz_rt', inplace=True)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
29 return mzrt_intensity
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
30
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
31
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
32 def join_samples(table):
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
33 num_samples = int((len(table.columns.tolist()) - 4) / 2) # 4 default columns: mz,rt,mz_min,mz_max. The rest is intensity and rt columns for each sample
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
34 RamClustr_data = pd.DataFrame(columns=['mz_rt'])
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
35 for sample in range(num_samples):
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
36 sample_data = extract_samples(table, num_samples, sample)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
37 RamClustr_data = pd.merge(RamClustr_data, sample_data, on='mz_rt', how='outer')
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
38 return RamClustr_data
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
39
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
40
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
41 def convert_to_RamClustR(RamClustr_data):
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
42 RamClustr_data.fillna(0, inplace=True)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
43 RamClustr_data.rename(columns={'mz_rt': 'sample'}, inplace=True)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
44 RamClustr_data.set_index('sample', inplace=True)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
45 RamClustr_data_transposed = RamClustr_data.transpose()
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
46 RamClustr_data_transposed.index.rename('sample', inplace=True)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
47 return RamClustr_data_transposed
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
48
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
49
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
50 def main():
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
51 try:
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
52 aplcms_table = pd.read_hdf(options.dataframe, options.table, errors='None')
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
53 except KeyError:
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
54 sys.exit("Selected table does not exist in HDF dataframe")
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
55
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
56 RamClutsr_data = join_samples(aplcms_table)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
57 RamClustr_data = convert_to_RamClustR(RamClutsr_data)
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
58 output = args[0]
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
59 RamClustr_data.to_csv(output, sep=';')
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
60 print("Table '{}' of HDF dataset is converted to csv for RamClutsR".format(options.table))
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
61
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
62
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
63 if __name__ == "__main__":
062f4c571a24 "planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff changeset
64 main()