Mercurial > repos > recetox > aplcms_to_ramclustr_converter
changeset 3:07667688735e draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
author | recetox |
---|---|
date | Wed, 17 Feb 2021 15:14:33 +0000 |
parents | 644192cf22a5 |
children | 9ea34e24474f |
files | aplcms_to_ramclustr_converter.py aplcms_to_ramclustr_converter.xml hdf_converter.py hdf_converter.xml |
diffstat | 4 files changed, 95 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aplcms_to_ramclustr_converter.py Wed Feb 17 15:14:33 2021 +0000 @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import argparse +import sys +import warnings + +import pandas as pd + + +warnings.simplefilter('ignore') + +parser = argparse.ArgumentParser() +parser.add_argument("--dataframe", help="Name of hdf dataframe") +parser.add_argument("--table", help="Name of a table in the dataframe") +parser.add_argument('output') +args = parser.parse_args() + + +def extract_data(table): + num_samples = int((len(table.columns.tolist()) - 4) / 2) + mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str) + + intensities = table.iloc[:, 4:(4 + num_samples)] + sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()] + ramclustr_data = pd.DataFrame({'mz_rt': mz_rt}) + + for idx in range(num_samples): + label = sample_labels[idx] + ramclustr_data[label] = intensities.iloc[:, idx] + + return ramclustr_data + + +def format_table(ramclustr_data): + ramclustr_data.set_index('mz_rt', inplace=True) + ramclustr_data = ramclustr_data.transpose() + ramclustr_data.index.rename('sample', inplace=True) + return ramclustr_data + + +def main(): + try: + aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None') + except KeyError: + msg = "Selected table does not exist in HDF dataframe" + print(msg, file=sys.stderr) + sys.exit(1) + + ramclustr_data = extract_data(aplcms_table) + ramclustr_table = format_table(ramclustr_data) + + ramclustr_table.to_csv(args.output, sep=',') + msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table) + print(msg, file=sys.stdout) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aplcms_to_ramclustr_converter.xml Wed Feb 17 15:14:33 2021 +0000 @@ -0,0 +1,37 @@ +<tool id="aplcms_to_ramclustr_converter" name="apLCMS to RamClustR converter" version="0.0.1+galaxy0" python_template_version="3.5"> + <description>converts aplcms HDF output to RamClustR csv input</description> + <requirements> + <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="1.1.5">pandas</requirement> + <requirement type="package" version="3.6.1">pytables</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/aplcms_to_ramclustr_converter.py --dataframe '${hdf_dataframe}' --table '${table_name}' '${out_file}' + ]]></command> + <inputs> + <param name="hdf_dataframe" type="data" format="h5" label="apLCMS Dataset"/> + <param name="table_name" type="select" label="Table Name"> + <option value="peaks">peaks</option> + <option value="aligned_peaks">aligned peaks</option> + </param> + </inputs> + <outputs> + <data format="csv" name="out_file" label="CSV with ${table_name} from ${hdf_dataframe.name}"/> + </outputs> + <tests> + <test> + <param name="hdf_dataframe" value="test-input.h5" ftype="h5"/> + <param name="table_name" value="peaks"/> + <output name="out_file" file="test-output.csv" ftype="csv"/> + </test> + </tests> + <help><![CDATA[ + Tool to convert apLCMS output to csv file formated in accordance to RamClustR (CSV) input requirements. + + **Workflow position** + + - Upstream tool: `apLCMS <https://github.com/RECETOX/galaxytools/tree/master/tools/aplcms>`_ Unsupervised or Hybrid + + - Downstream tool: `RamClustR <https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr>`_ (CSV) + ]]></help> +</tool>
--- a/hdf_converter.py Wed Jan 13 15:55:42 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -import argparse -import sys -import warnings - -import pandas as pd - - -warnings.simplefilter('ignore') - -parser = argparse.ArgumentParser() -parser.add_argument("--dataframe", help="Name of hdf dataframe") -parser.add_argument("--table", help="Name of a table in the dataframe") -parser.add_argument('output') -args = parser.parse_args() - - -def extract_data(table): - num_samples = int((len(table.columns.tolist()) - 4) / 2) - mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str) - - intensities = table.iloc[:, 4:(4 + num_samples)] - sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()] - ramclustr_data = pd.DataFrame({'mz_rt': mz_rt}) - - for idx in range(num_samples): - label = sample_labels[idx] - ramclustr_data[label] = intensities.iloc[:, idx] - - return ramclustr_data - - -def format_table(ramclustr_data): - ramclustr_data.set_index('mz_rt', inplace=True) - ramclustr_data = ramclustr_data.transpose() - ramclustr_data.index.rename('sample', inplace=True) - return ramclustr_data - - -def main(): - try: - aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None') - except KeyError: - msg = "Selected table does not exist in HDF dataframe" - print(msg, file=sys.stderr) - sys.exit(1) - - ramclustr_data = extract_data(aplcms_table) - ramclustr_table = format_table(ramclustr_data) - - ramclustr_table.to_csv(args.output, sep=',') - msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table) - print(msg, file=sys.stdout) - - -if __name__ == "__main__": - main()
--- a/hdf_converter.xml Wed Jan 13 15:55:42 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -<tool id="aplcms_to_ramclustr_converter" name="apLCMS to RamClustR converter" version="0.0.1" python_template_version="3.5"> - <description>converts aplcms HDF output to RamClustR csv input</description> - <requirements> - <requirement type="package" version="3.7">python</requirement> - <requirement type="package" version="1.1.5">pandas</requirement> - <requirement type="package" version="3.6.1">pytables</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - python $__tool_directory__/hdf_converter.py --dataframe '${hdf_dataframe}' --table '${table_name}' '${out_file}' - ]]></command> - <inputs> - <param name="hdf_dataframe" type="data" format="h5" label="apLCMS Dataset"/> - <param name="table_name" type="select" label="Table Name"> - <option value="peaks">peaks</option> - <option value="aligned_peaks">aligned peaks</option> - </param> - </inputs> - <outputs> - <data format="csv" name="out_file" label="CSV with ${table_name} from ${hdf_dataframe.name}"/> - </outputs> - <tests> - <test> - <param name="hdf_dataframe" value="test-input.h5" ftype="h5"/> - <param name="table_name" value="peaks"/> - <output name="out_file" file="test-output.csv" ftype="csv"/> - </test> - </tests> - <help><![CDATA[ - Tool to convert apLCMS output to csv file formated in accordance to RamClustR (CSV) input requirements. - - **Workflow position** - - - Upstream tool: `apLCMS <https://github.com/RECETOX/galaxytools/tree/master/tools/aplcms>`_ Unsupervised or Hybrid - - - Downstream tool: `RamClustR <https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr>`_ (CSV) - ]]></help> -</tool> \ No newline at end of file