# HG changeset patch
# User recetox
# Date 1610553342 0
# Node ID 644192cf22a502b95a88ae099fff2dacd0bc463b
# Parent 52470d439e50d687f967dc2bb7be7fdbdb5d0400
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
diff -r 52470d439e50 -r 644192cf22a5 hdf_converter.py
--- a/hdf_converter.py Wed Dec 16 17:28:30 2020 +0000
+++ b/hdf_converter.py Wed Jan 13 15:55:42 2021 +0000
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-import optparse
+import argparse
import sys
import warnings
@@ -9,55 +9,49 @@
warnings.simplefilter('ignore')
-parser = optparse.OptionParser()
-parser.add_option("--dataframe", help="Name of hdf dataframe")
-parser.add_option("--table", help="Name of a table in the dataframe")
-(options, args) = parser.parse_args()
+parser = argparse.ArgumentParser()
+parser.add_argument("--dataframe", help="Name of hdf dataframe")
+parser.add_argument("--table", help="Name of a table in the dataframe")
+parser.add_argument('output')
+args = parser.parse_args()
-def extract_samples(table, num_samples, idx):
- intensity_idx = 4 + idx
- rt_idx = 4 + num_samples + idx
- rt_idx_name = table.columns.tolist()[rt_idx]
- table.dropna(subset=[rt_idx_name], inplace=True)
- sample_name = table.columns.tolist()[intensity_idx].split('.')[1]
- mzrt = table['mz'].map(str) + '_' + table.iloc[:, rt_idx].map(str)
- intensity = table.iloc[:, intensity_idx]
- mzrt_intensity = {'mz_rt': mzrt, sample_name: intensity}
- mzrt_intensity = pd.DataFrame(mzrt_intensity, columns=['mz_rt', sample_name])
- mzrt_intensity.set_index('mz_rt', inplace=True)
- return mzrt_intensity
+def extract_data(table):
+ num_samples = int((len(table.columns.tolist()) - 4) / 2)
+ mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str)
+
+ intensities = table.iloc[:, 4:(4 + num_samples)]
+ sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()]
+ ramclustr_data = pd.DataFrame({'mz_rt': mz_rt})
+
+ for idx in range(num_samples):
+ label = sample_labels[idx]
+ ramclustr_data[label] = intensities.iloc[:, idx]
+
+ return ramclustr_data
-def join_samples(table):
- num_samples = int((len(table.columns.tolist()) - 4) / 2) # 4 default columns: mz,rt,mz_min,mz_max. The rest is intensity and rt columns for each sample
- RamClustr_data = pd.DataFrame(columns=['mz_rt'])
- for sample in range(num_samples):
- sample_data = extract_samples(table, num_samples, sample)
- RamClustr_data = pd.merge(RamClustr_data, sample_data, on='mz_rt', how='outer')
- return RamClustr_data
-
-
-def convert_to_RamClustR(RamClustr_data):
- RamClustr_data.fillna(0, inplace=True)
- RamClustr_data.rename(columns={'mz_rt': 'sample'}, inplace=True)
- RamClustr_data.set_index('sample', inplace=True)
- RamClustr_data_transposed = RamClustr_data.transpose()
- RamClustr_data_transposed.index.rename('sample', inplace=True)
- return RamClustr_data_transposed
+def format_table(ramclustr_data):
+ ramclustr_data.set_index('mz_rt', inplace=True)
+ ramclustr_data = ramclustr_data.transpose()
+ ramclustr_data.index.rename('sample', inplace=True)
+ return ramclustr_data
def main():
try:
- aplcms_table = pd.read_hdf(options.dataframe, options.table, errors='None')
+ aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None')
except KeyError:
- sys.exit("Selected table does not exist in HDF dataframe")
+ msg = "Selected table does not exist in HDF dataframe"
+ print(msg, file=sys.stderr)
+ sys.exit(1)
- RamClustr_data = join_samples(aplcms_table)
- RamClustr_data = convert_to_RamClustR(RamClustr_data)
- output = args[0]
- RamClustr_data.to_csv(output, sep=';')
- print("Table '{}' of HDF dataset is converted to csv for RamClutsR".format(options.table))
+ ramclustr_data = extract_data(aplcms_table)
+ ramclustr_table = format_table(ramclustr_data)
+
+ ramclustr_table.to_csv(args.output, sep=',')
+ msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table)
+ print(msg, file=sys.stdout)
if __name__ == "__main__":
diff -r 52470d439e50 -r 644192cf22a5 hdf_converter.xml
--- a/hdf_converter.xml Wed Dec 16 17:28:30 2020 +0000
+++ b/hdf_converter.xml Wed Jan 13 15:55:42 2021 +0000
@@ -20,9 +20,9 @@
-
+
-
+