Mercurial > repos > recetox > aplcms_to_ramclustr_converter
changeset 0:062f4c571a24 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/hdf_converter/ commit 7c15608bc9e6d0cc28daed590341b2b22f9fcedf"
author | recetox |
---|---|
date | Tue, 15 Dec 2020 17:38:07 +0000 |
parents | |
children | 52470d439e50 |
files | hdf_converter.py hdf_converter.xml test-data/test.csv test-data/test.h5 |
diffstat | 4 files changed, 105 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hdf_converter.py Tue Dec 15 17:38:07 2020 +0000 @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +import optparse +import sys +import warnings + +import pandas as pd + + +warnings.simplefilter('ignore') + +parser = optparse.OptionParser() +parser.add_option("--dataframe", help="Name of hdf dataframe") +parser.add_option("--table", help="Name of a table in the dataframe") +(options, args) = parser.parse_args() + + +def extract_samples(table, num_samples, idx): + intensity_idx = 4 + idx + rt_idx = 4 + num_samples + idx + rt_idx_name = table.columns.tolist()[rt_idx] + table.dropna(subset=[rt_idx_name], inplace=True) + sample_name = table.columns.tolist()[intensity_idx].split('.')[1] + mzrt = table['mz'].map(str) + '_' + table.iloc[:, rt_idx].map(str) + intensity = table.iloc[:, intensity_idx] + mzrt_intensity = {'mz_rt': mzrt, sample_name: intensity} + mzrt_intensity = pd.DataFrame(mzrt_intensity, columns=['mz_rt', sample_name]) + mzrt_intensity.set_index('mz_rt', inplace=True) + return mzrt_intensity + + +def join_samples(table): + num_samples = int((len(table.columns.tolist()) - 4) / 2) # 4 default columns: mz,rt,mz_min,mz_max. The rest is intensity and rt columns for each sample + RamClustr_data = pd.DataFrame(columns=['mz_rt']) + for sample in range(num_samples): + sample_data = extract_samples(table, num_samples, sample) + RamClustr_data = pd.merge(RamClustr_data, sample_data, on='mz_rt', how='outer') + return RamClustr_data + + +def convert_to_RamClustR(RamClustr_data): + RamClustr_data.fillna(0, inplace=True) + RamClustr_data.rename(columns={'mz_rt': 'sample'}, inplace=True) + RamClustr_data.set_index('sample', inplace=True) + RamClustr_data_transposed = RamClustr_data.transpose() + RamClustr_data_transposed.index.rename('sample', inplace=True) + return RamClustr_data_transposed + + +def main(): + try: + aplcms_table = pd.read_hdf(options.dataframe, options.table, errors='None') + except KeyError: + sys.exit("Selected table does not exist in HDF dataframe") + + RamClutsr_data = join_samples(aplcms_table) + RamClustr_data = convert_to_RamClustR(RamClutsr_data) + output = args[0] + RamClustr_data.to_csv(output, sep=';') + print("Table '{}' of HDF dataset is converted to csv for RamClutsR".format(options.table)) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hdf_converter.xml Tue Dec 15 17:38:07 2020 +0000 @@ -0,0 +1,37 @@ +<tool id="aplcms_to_ramclustr_converter" name="apLCMS to RamClustR converter" version="0.0.1" python_template_version="3.5"> + <description>converts aplcms HDF output to RamClustR csv input</description> + <requirements> + <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="1.1.5">pandas</requirement> + <requirement type="package" version="3.6.1">pytables</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/hdf_converter.py --dataframe '${hdf_dataframe}' --table '${table_name}' '${out_file}' + ]]></command> + <inputs> + <param name="hdf_dataframe" type="data" format="h5" label="apLCMS Dataset"/> + <param name="table_name" type="select" label="Table Name"> + <option value="peaks">Peaks</option> + <option value="aligned_peaks">Aligned Peaks</option> + </param> + </inputs> + <outputs> + <data format="csv" name="out_file" label="${hdf_dataframe.name}"/> + </outputs> + <tests> + <test> + <param name="hdf_dataframe" value="test.h5" ftype="h5"/> + <param name="table_name" value="peaks"/> + <output name="out_file" file="test.csv" ftype="csv"/> + </test> + </tests> + <help><![CDATA[ + Tool to convert apLCMS output to csv file formated in accordance to `RamClustR (CSV) <https://umsa.cerit-sc.cz/root?tool_id=testtoolshed.g2.bx.psu.edu/repos/recetox/ramclustr_csv/ramclustr_csv/1.1.0+galaxy0>`_ input requirements. + + **Workflow position** + + - Upstream tool: `apLCMS <https://github.com/RECETOX/galaxytools/tree/master/tools/aplcms>`_ Unsupervised or Hybrid + + - Downstream tool: `RamClustR (CSV) <https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr>`_ + ]]></help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv Tue Dec 15 17:38:07 2020 +0000 @@ -0,0 +1,4 @@ +sample;150.02661514282227_938.8019476417302;153.06580422141334_611.1752017147865;155.10737228393555_480.27547500000003;156.0813980102539_586.5000484855757;156.10260772705078_334.4977602397909;156.1390609741211_315.113886;157.12274475097655_368.9345031723409;160.07630920410156_439.365564;161.0601806640625_720.8310488251027;161.09627990722657_637.2365443088119;162.05557686941964_326.61288;163.03892135620117_272.87575257492387;163.0759859085083_280.0550372433282;164.0578155517578_681.4488703363858;165.05508422851562_283.91679595517866;166.07296316964286_721.1545905246769;169.12272135416666_479.84372700000006;175.14806256975447_1206.6737168756927;177.05474853515625_721.7283850030157;183.10235977172852_446.8473567295235;183.11315409342447_304.93846199999996;185.0915298461914_922.1733311642688;189.12728881835938_412.7624238197375;191.14295959472656_276.53056766721755;192.06651306152344_687.4957800000001;200.03367614746094_644.9233828344159;201.16358947753906_1008.0277402269971;203.17935180664062_1203.9995505627119;206.08199310302734_347.741922;207.1379638671875_270.25453488701595;213.9981231689453_808.8008892280452;224.0624237060547_599.5684353524813;225.11266479492187_353.8430531636007;239.00184631347656_681.1334622331717;251.04651641845703_929.1639031825723;283.2645233154297_1183.626240302571;286.9729309082031_676.9087047672;296.955810546875_1672.1757962593351;305.9568634033203_439.30924460300815;328.8801015218099_623.2023823355855;329.87464396158856_606.281769194836;332.9045715332031_910.0168644755723;371.88124084472656_787.2936540834307;447.3464101155599_1764.2850911916823;150.02661514282227_796.7172093164486;153.06580422141334_158.9254765392634;155.10737228393555_466.9844840287102;156.0813980102539_312.96899817700023;156.10260772705078_488.4615852917709;156.1390609741211_167.8595220214028;157.12274475097655_523.3498289613578;160.07630920410156_247.29874202349993;161.0601806640625_441.95758427459737;161.09627990722657_580.4881323409026;162.05557686941964_440.96340441291943;163.03892135620117_796.8430963792401;163.0759859085083_379.39353990628945;164.0578155517578_128.25268663387976;165.05508422851562_252.9986533545999;166.07296316964286_131.84881902646762;169.12272135416666_546.1283426712628;175.14806256975447_493.43436904006603;177.05474853515625_337.44044568974095;183.10235977172852_570.9405063835309;183.11315409342447_187.7237516688416;185.0915298461914_162.84496935948195;189.12728881835938_353.33259296539563;191.14295959472656_339.6349832928116;192.06651306152344_258.67192859042024;200.03367614746094_776.585197615002;201.16358947753906_682.6839027995527;203.17935180664062_758.5920102112317;206.08199310302734_428.5283583259357;207.1379638671875_421.096548734266;213.9981231689453_546.7571546239999;224.0624237060547_694.9922916325881;225.11266479492187_488.59262047186957;239.00184631347656_427.36106300049994;251.04651641845703_871.1730356038357;283.2645233154297_910.0384815236088;371.88124084472656_1034.5039556287943;447.3464101155599_1025.2508923259247;150.02661514282227_845.7333832586315;153.06580422141334_510.3243816185773;155.10737228393555_550.8905848150982;156.0813980102539_314.05532128033656;156.10260772705078_385.99200106385166;156.1390609741211_472.81046156483137;157.12274475097655_505.83486048106073;160.07630920410156_339.5631901594708;161.0601806640625_442.20327018732064;161.09627990722657_410.89648372190464;162.05557686941964_795.6713818439728;163.03892135620117_770.9278729695135;163.0759859085083_493.43556187837976;164.0578155517578_157.19920354121595;165.05508422851562_165.16140594533752;169.12272135416666_639.1452613522627;177.05474853515625_610.8213811287762;183.10235977172852_328.9023013070317;183.11315409342447_268.4740144360445;185.0915298461914_928.0227080713579;189.12728881835938_492.99624785933946;191.14295959472656_296.259279456;192.06651306152344_542.8763980030429;200.03367614746094_1021.2380862887392;201.16358947753906_704.528780655999;203.17935180664062_815.169030127998;206.08199310302734_271.78564092508685;207.1379638671875_503.2693560859386;213.9981231689453_181.36949469351194;224.0624237060547_485.20504026533274;225.11266479492187_334.0855593942741;239.00184631347656_403.60010492733846;283.2645233154297_599.2879510569135;371.88124084472656_1241.162278923131 +dataset_10521;1215392.869616097;975234.3681913143;3145.758950502032;871892.09463558;262273.9939250465;766234.7892128396;334247.95281354076;231500.2353033095;294860.59298909595;2395651.280663671;849661.108994921;80466.7756231959;72428.14805623311;127077.83747100987;55520.84827070644;4127094.4292257214;59640.23751499624;7652185.262984221;144910.735221952;166897.17587020385;87732.66018536678;3023552.5915814983;88694.0598482846;244298.21820218943;141422.37291731313;674902.3998699638;2547096.5969816875;3588037.106536677;209.7133298382118;61377.72462865346;1191751.7508739545;1264638.5321834516;770309.8229833202;1598879.0991255937;164774.3060948712;164768.8613643962;91979.90318477267;7431113.623487963;1276060.9962216804;2011088.4677158804;4971687.988498637;148538.0895845904;9021188.2259561;105084398.97224118;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 +dataset_10522;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;32150185.185350575;9834021881.849878;177588342.0332911;179614.3282502566;96745281.9342085;469604369.7722277;95033630.35721388;77316584.93981199;134675681.75331393;190181701.54387808;615101513.1008483;190020488.55910504;251928218.86627144;10936518.250125624;252585494.2689283;646510595.5052491;73359543.58753827;214978764.8466887;322203544.90340024;72436506.52413756;1445418813.6531928;88336503.05946895;60811063.919428736;158241168.89263293;905586059.7025735;4124.147416447633;106544642.38697338;44096828.71725818;134415400.67590773;449404430.6521204;28811.493537419;67477829.31875584;488234885.1267421;1866841.9576029498;32215523.215176005;47668221.6651677;1462947.1856949513;24522170.015276425;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 +dataset_10523;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;1296621.7555035395;37736934.45890023;66198164.82704608;10565142.324429516;6122345.629883338;2119013.70945859;78747838.96738881;56570282.5892835;13354705.541505456;7726736.478018326;29240464.09306413;39581631.68813634;20890995.43676758;193261170.73742604;1750590407.3483374;46344568.06367108;25595900.904584527;359367773.3817498;61477574.7010956;5068744.321105652;6276713.234994051;18941.99647148264;403373060.1459068;18594158.257272538;149740.5548094683;15454.613880087607;209499183.3478378;18309361.597434632;437040854.082904;360654345.7228254;53623121.16868094;104058664.95421386;507817403.6759834;1325572.5215508991