Mercurial > repos > recetox > aplcms_to_ramclustr_converter
annotate hdf_converter.py @ 0:062f4c571a24 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
author | recetox |
---|---|
date | Tue, 15 Dec 2020 17:38:07 +0000 |
parents | |
children | 52470d439e50 |
rev | line source |
---|---|
0
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
1 #!/usr/bin/env python |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
2 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
3 import optparse |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
4 import sys |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
5 import warnings |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
6 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
7 import pandas as pd |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
8 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
9 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
10 warnings.simplefilter('ignore') |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
11 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
12 parser = optparse.OptionParser() |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
13 parser.add_option("--dataframe", help="Name of hdf dataframe") |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
14 parser.add_option("--table", help="Name of a table in the dataframe") |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
15 (options, args) = parser.parse_args() |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
16 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
17 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
18 def extract_samples(table, num_samples, idx): |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
19 intensity_idx = 4 + idx |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
20 rt_idx = 4 + num_samples + idx |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
21 rt_idx_name = table.columns.tolist()[rt_idx] |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
22 table.dropna(subset=[rt_idx_name], inplace=True) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
23 sample_name = table.columns.tolist()[intensity_idx].split('.')[1] |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
24 mzrt = table['mz'].map(str) + '_' + table.iloc[:, rt_idx].map(str) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
25 intensity = table.iloc[:, intensity_idx] |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
26 mzrt_intensity = {'mz_rt': mzrt, sample_name: intensity} |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
27 mzrt_intensity = pd.DataFrame(mzrt_intensity, columns=['mz_rt', sample_name]) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
28 mzrt_intensity.set_index('mz_rt', inplace=True) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
29 return mzrt_intensity |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
30 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
31 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
32 def join_samples(table): |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
33 num_samples = int((len(table.columns.tolist()) - 4) / 2) # 4 default columns: mz,rt,mz_min,mz_max. The rest is intensity and rt columns for each sample |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
34 RamClustr_data = pd.DataFrame(columns=['mz_rt']) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
35 for sample in range(num_samples): |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
36 sample_data = extract_samples(table, num_samples, sample) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
37 RamClustr_data = pd.merge(RamClustr_data, sample_data, on='mz_rt', how='outer') |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
38 return RamClustr_data |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
39 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
40 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
41 def convert_to_RamClustR(RamClustr_data): |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
42 RamClustr_data.fillna(0, inplace=True) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
43 RamClustr_data.rename(columns={'mz_rt': 'sample'}, inplace=True) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
44 RamClustr_data.set_index('sample', inplace=True) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
45 RamClustr_data_transposed = RamClustr_data.transpose() |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
46 RamClustr_data_transposed.index.rename('sample', inplace=True) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
47 return RamClustr_data_transposed |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
48 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
49 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
50 def main(): |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
51 try: |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
52 aplcms_table = pd.read_hdf(options.dataframe, options.table, errors='None') |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
53 except KeyError: |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
54 sys.exit("Selected table does not exist in HDF dataframe") |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
55 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
56 RamClutsr_data = join_samples(aplcms_table) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
57 RamClustr_data = convert_to_RamClustR(RamClutsr_data) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
58 output = args[0] |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
59 RamClustr_data.to_csv(output, sep=';') |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
60 print("Table '{}' of HDF dataset is converted to csv for RamClutsR".format(options.table)) |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
61 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
62 |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
63 if __name__ == "__main__": |
062f4c571a24
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
recetox
parents:
diff
changeset
|
64 main() |