Mercurial > repos > recetox > october_recetox_xmsannotator_advanced
changeset 0:e2e2e455fcfb draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox-xmsannotator commit c60206d37ba8fef418ba2a2bfcb4d3b8a70ab8c3"
author | recetox |
---|---|
date | Fri, 29 Oct 2021 09:49:13 +0000 |
parents | |
children | 8fcf5ca25f5d |
files | macros.xml october_recetox_xmsannotator_advanced.xml |
diffstat | 2 files changed, 237 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Oct 29 09:49:13 2021 +0000 @@ -0,0 +1,65 @@ +<macros> + <token name="@TOOL_VERSION@">0.9.0</token> + <xml name="requirements"> + <requirements> + <container type="docker">recetox/recetox-xmsannotator:october4</container> + </requirements> + </xml> + <xml name="creator"> + <creator> + <organization + url="https://www.recetox.muni.cz/" + name="RECETOX MUNI" /> + </creator> + </xml> + <xml name="inputs"> + <param name="peak_table" type="data" format="csv,parquet"> + <label>Peak table</label> + <help><![CDATA[ + A peak-intensity table such as outputted from apLCMS. + The file is required to contain the fields <em>mz</em> and <em>rt</em>. + Columns for feature intensity in a sample have to start with <em>intensity</em>. + + ]]></help> + </param> + <param name="compound_table" type="data" format="csv,parquet"> + <label>Compound database</label> + <help><![CDATA[ + Database of compounds according to which the annotation is performed. + The database is required to contain the fields <em>compound_id</em>, <em>monoisotopic_mass</em>, and <em>molecular_formula</em>. + ]]></help> + </param> + <param name="adduct_table" type="data" format="csv,parquet" optional="true"> + <label>Adduct database (optional)</label> + <help><![CDATA[ + Database of adduct which is combined with the database of compound to form a molecule-adduct pairs. + The database is required to contain <em>adduct</em>, <em>charge</em>, <em>mass</em>, and <em>n_molecules</em>. + ]]></help> + </param> + </xml> + + <xml name="outputs"> + <data name="annotation_parquet" format="parquet"/> + </xml> + + <xml name="tolerance"> + <param name="mass_tolerance_ppm" type="integer" min="0" value="5"> + <label>Mass tolerance [ppm]</label> + <help>Mass tolerance in ppm for database matching.</help> + </param> + <yield/> + </xml> + <token name="@HELP@"> + <![CDATA[ + Annotate the peak intensity table (e.g. from an apLCMS run) with compounds from the compounds database + using advanced methods. + + The annotation process generates all possible compound-adduct pairs and matches those pairs to the measured + peaks. A compound-adduct pair is pronounced as a match to a certain peak when the difference of their masses are + withing some tolerance. + ]]> + </token> + <xml name="citations"> + <citation type="doi">10.1021/acs.analchem.6b01214</citation> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/october_recetox_xmsannotator_advanced.xml Fri Oct 29 09:49:13 2021 +0000 @@ -0,0 +1,172 @@ +<tool id="october_recetox_xmsannotator_advanced" name="OCTOBER RECETOX xMSannotator advanced" version="@TOOL_VERSION@+galaxy0"> + <description>annotate peak intensity table including scores and confidence levels</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="creator"/> + <expand macro="requirements" /> + <command detect_errors="aggressive"><![CDATA[ + Rscript -e "n_workers <- \${GALAXY_SLOTS:-1}" -e "source('${wrapper}')" + ]]></command> + + <configfiles> + <configfile name="wrapper"><![CDATA[ + library(recetox.xmsannotator) + + annotation <- advanced_annotation( + #if $peak_table.is_of_type("parquet") + peak_table = load_peak_table_parquet("${peak_table}"), + #end if + adduct_table = load_adduct_table_parquet("${adduct_table}"), + compound_table = load_compound_table_parquet("${compound_table}"), + mass_tolerance = 1e-6 * ${mass_tolerance_ppm}, + time_tolerance = $time_tolerance, + correlation_threshold = as.double($clustering.correlation_threshold), + min_cluster_size = as.integer($clustering.min_cluster_size), + deep_split = as.integer($clustering.deep_split), + network_type = "$clustering.network_type", + ## #if $scoring.expected_adducts + ## expected_adducts = load_expected_adducts_csv("${scoring.expected_adducts}"), + ## #end if + ## #if $scoring.boost_compounds + ## boost_compounds = load_boost_compounds_csv("${scoring.boost_compounds}"), + ## #end if + redundancy_filtering = $scoring.redundancy_filtering, + n_workers = n_workers, + ## new params + intensity_deviation_tolerance = as.double($intensity_deviation_tolerance), + mass_defect_tolerance = as.double($mass_defect_tolerance), + mass_defect_precision = as.double($mass_defect_precision), + peak_rt_width = as.integer($peak_rt_width), + maximum_isotopes = as.integer($maximum_isotopes), + min_ions_per_chemical = as.integer($min_ions_per_chemical), + filter_by = "$filter_by" + ) + + save_parquet(data = annotation, file = "${annotation_parquet}") + ]]></configfile> + </configfiles> + + <inputs> + <expand macro="inputs"/> + <expand macro="tolerance"> + <param name="time_tolerance" type="float" value="10" min="0"> + <label>Retention time tolerance [s]</label> + <help> + Retention time tolerance in seconds for finding peaks derived from the same parent compound. + + </help> + </param> + </expand> + <section name="clustering" title="Clustering"> + <param name="correlation_method" type="select" display="radio" label="Correlation method"> + <option value="pearson" selected="true"/> + <option value="spearman"/> + </param> + <param name="correlation_threshold" type="float" value="0.7"> + <label>Correlation threshold</label> + <help>Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite.</help> + </param> + <param name="min_cluster_size" type="integer" value="10" min="1"> + <label>Minimum cluster size</label> + <help>The minimum number of nodes to be considered as a cluster.</help> + </param> + <param name="deep_split" type="integer" value="2" min="0" max="4"> + <label>Deep split</label> + <help> + Deep split provides a rough control over sensitivity to cluster splitting. The higher the value, + the more and smaller clusters will be produced (see WGCNA package documentation). + </help> + </param> + <param name="network_type" type="select" display="radio"> + <label>Network type</label> + <help> + Network type parameter affects how the network's adjacency matrix is created from the correlation + matrix (see WGCNA package documentation). + </help> + <option value="signed"/> + <option value="unsigned" selected="true"/> + </param> + </section> + <section name="scoring" title="Scoring" expanded="true"> + <param name="strict_boosting" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"> + <label>Strict boosting</label> + <help> + Boost the scores of metabolites that not only belongs to the same pathway but also to the same + cluster. Otherwise, do not account for cluster membership. + </help> + </param> + <!-- <param name="expected_adducts" type="data" format="csv" optional="true"> + <label>Expected adducts (optional)</label> + + <help> + Require the presence of certain adducts for a high confidence match. By default, at least the + presence of an M+H adduct is required for a high confidence match. + </help> + </param> --> + <!-- <param name="boost_compounds" type="data" format="csv" optional="true"> + <label>Validated compounds score boosting (optional)</label> + <help> + Table of previously validated compounds to boost their scores and confidence levels. + The 1st column of the table must contain IDs of compounds. + The optional 2nd and 3rd columns may contain mz values and retention times. + </help> + </param> --> + <param name="min_isp" type="integer" min="0" value="1"> + <label>Minimum number of expected isotopes</label> + <help> + Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match. + </help> + </param> + <param name="max_isp" type="integer" min="0" value="5"> + <label>Maximum number of expected isotopes</label> + <help> + Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match. + </help> + </param> + <param name="redundancy_filtering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"> + <label>Redundancy filtering</label> + <help>Whether to filter out low-scored multiple matcher or not.</help> + </param> + </section> + <param name="intensity_deviation_tolerance" type="float" value="0.1"> + <label>intensity_deviation_tolerance</label> + </param> + <param name="mass_defect_tolerance" type="float" value="0.1"> + <label>mass_defect_tolerance</label> + </param> + <param name="mass_defect_precision" type="float" value="0.01"> + <label>mass_defect_precision</label> + </param> + <param name="peak_rt_width" type="integer" value="1"> + <label>peak_rt_width</label> + </param> + <param name="maximum_isotopes" type="integer" value="10"> + <label>maximum_isotopes</label> + </param> + <param name="min_ions_per_chemical" type="integer" value="2"> + <label>min_ions_per_chemical</label> + </param> + <param name="filter_by" type="text" value="c('M-H', 'M+H')"> + <!-- turn sanitizer off for prototype, TODO refactor this for production --> + <sanitizer sanitize="false"></sanitizer> + <label>filter_by</label> + </param> + </inputs> + + <outputs> + <expand macro="outputs"/> + </outputs> + + <help> + <![CDATA[ + @HELP@ + Then, a score and a confidence level is assigned to each match based on peak correlation + clustering, metabolite pathway associations, adducts expectations, and isotope conformations. + ]]> + </help> + + <citations> + <expand macro="citations"/> + </citations> +</tool>