Mercurial > repos > recetox > aplcms_to_ramclustr_converter
diff aplcms_to_ramclustr_converter.py @ 3:07667688735e draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
author | recetox |
---|---|
date | Wed, 17 Feb 2021 15:14:33 +0000 |
parents | |
children | 9ea34e24474f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aplcms_to_ramclustr_converter.py Wed Feb 17 15:14:33 2021 +0000 @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import argparse +import sys +import warnings + +import pandas as pd + + +warnings.simplefilter('ignore') + +parser = argparse.ArgumentParser() +parser.add_argument("--dataframe", help="Name of hdf dataframe") +parser.add_argument("--table", help="Name of a table in the dataframe") +parser.add_argument('output') +args = parser.parse_args() + + +def extract_data(table): + num_samples = int((len(table.columns.tolist()) - 4) / 2) + mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str) + + intensities = table.iloc[:, 4:(4 + num_samples)] + sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()] + ramclustr_data = pd.DataFrame({'mz_rt': mz_rt}) + + for idx in range(num_samples): + label = sample_labels[idx] + ramclustr_data[label] = intensities.iloc[:, idx] + + return ramclustr_data + + +def format_table(ramclustr_data): + ramclustr_data.set_index('mz_rt', inplace=True) + ramclustr_data = ramclustr_data.transpose() + ramclustr_data.index.rename('sample', inplace=True) + return ramclustr_data + + +def main(): + try: + aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None') + except KeyError: + msg = "Selected table does not exist in HDF dataframe" + print(msg, file=sys.stderr) + sys.exit(1) + + ramclustr_data = extract_data(aplcms_table) + ramclustr_table = format_table(ramclustr_data) + + ramclustr_table.to_csv(args.output, sep=',') + msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table) + print(msg, file=sys.stdout) + + +if __name__ == "__main__": + main()