Mercurial > repos > recetox > aplcms_to_ramclustr_converter

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aplcms_to_ramclustr_converter.py	Wed Feb 17 15:14:33 2021 +0000
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+import warnings
+
+import pandas as pd
+
+
+warnings.simplefilter('ignore')
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--dataframe", help="Name of hdf dataframe")
+parser.add_argument("--table", help="Name of a table in the dataframe")
+parser.add_argument('output')
+args = parser.parse_args()
+
+
+def extract_data(table):
+    num_samples = int((len(table.columns.tolist()) - 4) / 2)
+    mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str)
+
+    intensities = table.iloc[:, 4:(4 + num_samples)]
+    sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()]
+    ramclustr_data = pd.DataFrame({'mz_rt': mz_rt})
+
+    for idx in range(num_samples):
+        label = sample_labels[idx]
+        ramclustr_data[label] = intensities.iloc[:, idx]
+
+    return ramclustr_data
+
+
+def format_table(ramclustr_data):
+    ramclustr_data.set_index('mz_rt', inplace=True)
+    ramclustr_data = ramclustr_data.transpose()
+    ramclustr_data.index.rename('sample', inplace=True)
+    return ramclustr_data
+
+
+def main():
+    try:
+        aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None')
+    except KeyError:
+        msg = "Selected table does not exist in HDF dataframe"
+        print(msg, file=sys.stderr)
+        sys.exit(1)
+
+    ramclustr_data = extract_data(aplcms_table)
+    ramclustr_table = format_table(ramclustr_data)
+
+    ramclustr_table.to_csv(args.output, sep=',')
+    msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table)
+    print(msg, file=sys.stdout)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aplcms_to_ramclustr_converter.xml	Wed Feb 17 15:14:33 2021 +0000
@@ -0,0 +1,37 @@
+<tool id="aplcms_to_ramclustr_converter" name="apLCMS to RamClustR converter" version="0.0.1+galaxy0" python_template_version="3.5">
+    <description>converts aplcms HDF output to RamClustR csv input</description>
+    <requirements>
+        <requirement type="package" version="3.7">python</requirement>
+        <requirement type="package" version="1.1.5">pandas</requirement>
+        <requirement type="package" version="3.6.1">pytables</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/aplcms_to_ramclustr_converter.py --dataframe '${hdf_dataframe}' --table '${table_name}' '${out_file}'
+    ]]></command>
+    <inputs>
+        <param name="hdf_dataframe" type="data" format="h5" label="apLCMS Dataset"/>
+        <param name="table_name" type="select" label="Table Name">
+            <option value="peaks">peaks</option>
+            <option value="aligned_peaks">aligned peaks</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="csv" name="out_file" label="CSV with ${table_name} from ${hdf_dataframe.name}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="hdf_dataframe" value="test-input.h5" ftype="h5"/>
+            <param name="table_name" value="peaks"/>
+            <output name="out_file" file="test-output.csv" ftype="csv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Tool to convert apLCMS output to csv file formated in accordance to RamClustR (CSV) input requirements.
+
+        **Workflow position**
+
+        - Upstream tool: `apLCMS <https://github.com/RECETOX/galaxytools/tree/master/tools/aplcms>`_  Unsupervised or Hybrid
+
+        - Downstream tool: `RamClustR <https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr>`_ (CSV)
+    ]]></help>
+</tool>
--- a/hdf_converter.py	Wed Jan 13 15:55:42 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import sys
-import warnings
-
-import pandas as pd
-
-
-warnings.simplefilter('ignore')
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--dataframe", help="Name of hdf dataframe")
-parser.add_argument("--table", help="Name of a table in the dataframe")
-parser.add_argument('output')
-args = parser.parse_args()
-
-
-def extract_data(table):
-    num_samples = int((len(table.columns.tolist()) - 4) / 2)
-    mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str)
-
-    intensities = table.iloc[:, 4:(4 + num_samples)]
-    sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()]
-    ramclustr_data = pd.DataFrame({'mz_rt': mz_rt})
-
-    for idx in range(num_samples):
-        label = sample_labels[idx]
-        ramclustr_data[label] = intensities.iloc[:, idx]
-
-    return ramclustr_data
-
-
-def format_table(ramclustr_data):
-    ramclustr_data.set_index('mz_rt', inplace=True)
-    ramclustr_data = ramclustr_data.transpose()
-    ramclustr_data.index.rename('sample', inplace=True)
-    return ramclustr_data
-
-
-def main():
-    try:
-        aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None')
-    except KeyError:
-        msg = "Selected table does not exist in HDF dataframe"
-        print(msg, file=sys.stderr)
-        sys.exit(1)
-
-    ramclustr_data = extract_data(aplcms_table)
-    ramclustr_table = format_table(ramclustr_data)
-
-    ramclustr_table.to_csv(args.output, sep=',')
-    msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table)
-    print(msg, file=sys.stdout)
-
-
-if __name__ == "__main__":
-    main()
--- a/hdf_converter.xml	Wed Jan 13 15:55:42 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-<tool id="aplcms_to_ramclustr_converter" name="apLCMS to RamClustR converter" version="0.0.1" python_template_version="3.5">
-    <description>converts aplcms HDF output to RamClustR csv input</description>
-    <requirements>
-        <requirement type="package" version="3.7">python</requirement>
-        <requirement type="package" version="1.1.5">pandas</requirement>
-        <requirement type="package" version="3.6.1">pytables</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        python $__tool_directory__/hdf_converter.py --dataframe '${hdf_dataframe}' --table '${table_name}' '${out_file}'
-    ]]></command>
-    <inputs>
-        <param name="hdf_dataframe" type="data" format="h5" label="apLCMS Dataset"/>
-        <param name="table_name" type="select" label="Table Name">
-            <option value="peaks">peaks</option>
-            <option value="aligned_peaks">aligned peaks</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data format="csv" name="out_file" label="CSV with ${table_name} from ${hdf_dataframe.name}"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="hdf_dataframe" value="test-input.h5" ftype="h5"/>
-            <param name="table_name" value="peaks"/>
-            <output name="out_file" file="test-output.csv" ftype="csv"/>
-        </test>
-    </tests>
-    <help><![CDATA[
-        Tool to convert apLCMS output to csv file formated in accordance to RamClustR (CSV) input requirements.
-
-        **Workflow position**
-
-        - Upstream tool: `apLCMS <https://github.com/RECETOX/galaxytools/tree/master/tools/aplcms>`_  Unsupervised or Hybrid
-
-        - Downstream tool: `RamClustR <https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr>`_ (CSV)
-    ]]></help>
-</tool>
\ No newline at end of file