Mercurial > repos > recetox > recetox_aplcms_unsupervised
changeset 10:6057540f65a9 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 46f606d8d234807e603b55eb2791f76663b551ee"
author | recetox |
---|---|
date | Thu, 21 Oct 2021 15:03:18 +0000 |
parents | b18c2d014b28 |
children | e0f77e03599e |
files | main.R recetox_aplcms_macros.xml recetox_aplcms_unsupervised.xml test-data/hybrid.recetox.parquet test-data/unsupervised.recetox.parquet |
diffstat | 5 files changed, 51 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/main.R Tue Oct 05 13:12:50 2021 +0000 +++ b/main.R Thu Oct 21 15:03:18 2021 +0000 @@ -1,4 +1,5 @@ library(recetox.aplcms) +library(dplyr) save_extracted_features <- function(df, filename) { df <- as.data.frame(df) @@ -6,11 +7,37 @@ arrow::write_parquet(df[columns], filename) } -save_feature_sample_table <- function(df, filename) { +save_aligned_feature_table <- function(df, filename) { columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity") arrow::write_parquet(df[columns], filename) } +save_recovered_feature_table <- function(df, filename, out_format) { + columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity") + if (out_format == "recetox") { + peak_table <- df[columns] + recetox_peak_table <- rcx_aplcms_to_rcx_xmsannotator(peak_table) + arrow::write_parquet(recetox_peak_table, filename) + } else { + arrow::write_parquet(df[columns], filename) + } +} + +rcx_aplcms_to_rcx_xmsannotator <- function(peak_table) { + col_base <- c("feature", "mz", "rt") + output_table <- peak_table %>% distinct(across(any_of(col_base))) + + for (level in levels(peak_table$sample)) { + subdata <- peak_table %>% + filter(sample == level) %>% + select(any_of(c(col_base, "sample_intensity"))) %>% + rename(!!level := "sample_intensity") + output_table <- inner_join(output_table, subdata, by = col_base) + } + output_table <- output_table %>% rename(peak = feature) + return(output_table) +} + known_table_columns <- function() { c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max", @@ -47,7 +74,7 @@ mapply(save_extracted_features, dfs, filenames) } -unsupervised_main <- function(sample_files, aligned_file, recovered_file, ...) { +unsupervised_main <- function(sample_files, aligned_file, recovered_file, out_format, ...) { sample_files <- sort_samples_by_acquisition_number(sample_files) res <- unsupervised(filenames = sample_files, ...) @@ -55,11 +82,11 @@ save_all_extracted_features(res$extracted_features, sample_files) save_all_corrected_features(res$corrected_features, sample_files) - save_feature_sample_table(res$aligned_feature_sample_table, aligned_file) - save_feature_sample_table(res$recovered_feature_sample_table, recovered_file) + save_aligned_feature_table(res$aligned_feature_sample_table, aligned_file) + save_recovered_feature_table(res$recovered_feature_sample_table, recovered_file, out_format) } -hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, ...) { +hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, out_format, ...) { sample_files <- sort_samples_by_acquisition_number(sample_files) known <- read_known_table(known_table_file) @@ -71,6 +98,6 @@ save_all_extracted_features(res$extracted_features, sample_files) save_all_corrected_features(res$corrected_features, sample_files) - save_feature_sample_table(res$aligned_feature_sample_table, aligned_file) - save_feature_sample_table(res$recovered_feature_sample_table, recovered_file) + save_aligned_feature_table(res$aligned_feature_sample_table, aligned_file) + save_recovered_feature_table(res$recovered_feature_sample_table, recovered_file, out_format) }
--- a/recetox_aplcms_macros.xml Tue Oct 05 13:12:50 2021 +0000 +++ b/recetox_aplcms_macros.xml Thu Oct 21 15:03:18 2021 +0000 @@ -5,6 +5,7 @@ <requirement type="package" version="4.1.0">r-base</requirement> <requirement type="package" version="4.0.1">r-arrow</requirement> <requirement type="package" version="0.9.3">r-recetox-aplcms</requirement> + <requirement type="package" version="1.0.7">r-dplyr</requirement> </requirements> </xml> @@ -19,7 +20,7 @@ <param name="files" type="data" format="mzdata,mzml,mzxml,netcdf" multiple="true" min="3" label="data" help="Mass spectrometry files for peak extraction." /> <yield /> - </inputs> + </inputs> </xml> <xml name="history_db"> @@ -132,6 +133,11 @@ help="The minimum number of raw data points to be considered as a true feature." /> </section> </xml> + <xml name="output_format"> + <section name="output_format" title="Output Format"> + <param name="out_format" type="boolean" checked="false" truevalue="recetox" falsevalue="original" label="Use custom RECETOX output format?" /> + </section> + </xml> <xml name="unsupervised_outputs"> <data name="recovered_feature_sample_table" format="parquet" label="${tool.name} recovered_feature_sample_table on ${on_string}" />
--- a/recetox_aplcms_unsupervised.xml Tue Oct 05 13:12:50 2021 +0000 +++ b/recetox_aplcms_unsupervised.xml Thu Oct 21 15:03:18 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="recetox_aplcms_unsupervised" name="RECETOX apLCMS Unsupervised" version="@TOOL_VERSION@+galaxy1"> +<tool id="recetox_aplcms_unsupervised" name="RECETOX apLCMS Unsupervised" version="@TOOL_VERSION@+galaxy2"> <description>generate a feature table from LC/MS spectra</description> <macros> <import>recetox_aplcms_macros.xml</import> @@ -22,6 +22,7 @@ sample_files = c('$filenames_str'), aligned_file = '${aligned_feature_sample_table}', recovered_file = '${recovered_feature_sample_table}', + out_format = '$output_format.out_format', min_exp = $noise_filtering.min_exp, min_pres = $noise_filtering.min_pres, min_run = $noise_filtering.min_run, @@ -55,6 +56,7 @@ <expand macro="feature_detection" /> <expand macro="peak_alignment" /> <expand macro="weak_signal_recovery" /> + <expand macro="output_format" /> </expand> <outputs> @@ -66,6 +68,13 @@ <param name="files" value="mbr_test0.mzml,mbr_test1.mzml,mbr_test2.mzml" ftype="mzml" /> <output name="recovered_feature_sample_table" file="unsupervised_recovered_feature_sample_table.parquet" ftype="parquet"/> </test> + <test> + <param name="files" value="mbr_test0.mzml,mbr_test1.mzml,mbr_test2.mzml" ftype="mzml" /> + <section name="output_format"> + <param name="out_format" value="recetox" /> + </section> + <output name="recovered_feature_sample_table" file="unsupervised.recetox.parquet" ftype="parquet"/> + </test> </tests> <help>