comparison xmsannotator_advanced.xml @ 5:47185b5abe9e draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/xmsannotator commit 339f3b1b1243f353dd3ed115ac66320fdd2d2b68"
author recetox
date Mon, 14 Dec 2020 03:33:44 +0000
parents 6b2b2b83b46c
children fff8d90068cd
comparison
equal deleted inserted replaced
4:6b2b2b83b46c 5:47185b5abe9e
1 <tool id="xmsannotator_advanced" name="xMSannotator (advanced)" version="@TOOL_VERSION@+galaxy0"> 1 <tool id="xmsannotator_advanced" name="xMSannotator (advanced)" version="@TOOL_VERSION@+galaxy2">
2 <macros> 2 <macros>
3 <import>xmsannotator_macros.xml</import> 3 <import>xmsannotator_macros.xml</import>
4 </macros> 4 </macros>
5 5
6 <requirements> 6 <requirements>
7 <container type="docker">@DOCKER_IMAGE@</container> 7 <container type="docker">@DOCKER_IMAGE@</container>
8 </requirements> 8 </requirements>
9 9
10 <command detect_errors="aggressive"><![CDATA[ 10 <command detect_errors="aggressive"><![CDATA[
11 #set expected_adducts = ['"{}"'.format($i.adduct) for $i in $scoring.expected_adducts] 11 Rscript $wrapper
12 #set expected_adducts = "c(" + ', '.join($expected_adducts) + ")"
13
14 Rscript -e 'annotation <- xmsannotator::advanced_annotation(
15 peaks = xmsannotator::load_hdf("$peaks", "peaks"),
16 adducts = xmsannotator::load_hdf("$adducts", "adducts"),
17 compounds = xmsannotator::load_hdf("$compounds", "compounds"),
18
19 mass_tolerance = 1e-6 * $mz_tolerance_ppm,
20 time_tolerance = $rt_tolerance,
21
22 correlation_threshold = as.double($clustering.correlation_threshold),
23 min_cluster_size = as.integer($clustering.min_cluster_size),
24 deep_split = as.integer($clustering.deep_split),
25 network_type = "$clustering.network_type",
26
27 expected_adducts = as.character($expected_adducts),
28 #if $scoring.boost
29 boost = xmsannotator::load_hdf("$scoring.boost", "boost"),
30 #end if
31 redundancy_filtering = $scoring.redundancy_filtering
32 )'
33 -e 'xmsannotator::save_hdf("$annotation", "annotation", annotation)'
34 ]]></command> 12 ]]></command>
35 13
14 <configfiles>
15 <configfile name="wrapper"><![CDATA[
16 library(xmsannotator)
17
18 annotation <- advanced_annotation(
19 #if $peak_table.is_of_type("h5")
20 peak_table = load_peak_table_hdf("${peak_table}"),
21 #elif $peak_table.is_of_type("parquet")
22 peak_table = load_peak_table_parquet("${peak_table}"),
23 #end if
24 adduct_table = load_adduct_table_parquet("${adduct_table}"),
25 compound_table = load_compound_table_parquet("${compound_table}"),
26 mass_tolerance = 1e-6 * ${mass_tolerance_ppm},
27 time_tolerance = $time_tolerance,
28 correlation_threshold = as.double($clustering.correlation_threshold),
29 min_cluster_size = as.integer($clustering.min_cluster_size),
30 deep_split = as.integer($clustering.deep_split),
31 network_type = "$clustering.network_type",
32 #if $scoring.expected_adducts
33 expected_adducts = load_expected_adducts_csv("${$scoring.expected_adducts_csv}"),
34 #end if
35 #if $scoring.boost_compounds
36 boost_compounds = load_boost_compounds_csv("${scoring.boost_compounds_csv}"),
37 #end if
38 redundancy_filtering = $scoring.redundancy_filtering,
39 n_workers = \${GALAXY_SLOTS:-1}
40 )
41
42 save_parquet(data = annotation, file = "${annotation_parquet}")
43 ]]></configfile>
44 </configfiles>
45
36 <inputs> 46 <inputs>
37 <expand macro="annotation"/> 47 <expand macro="inputs"/>
38 <param name="rt_tolerance" type="float" value="10" min="0" label="Retention time tolerance [s]" help="Retention time tolerance in seconds for finding peaks derived from the same parent metabolite."/> 48 <expand macro="tolerance">
39 49 <param name="time_tolerance" type="float" value="10" min="0">
50 <label>Retention time tolerance [s]</label>
51 <help>
52 Retention time tolerance in seconds for finding peaks derived from the same parent metabolite.
53 </help>
54 </param>
55 </expand>
40 <section name="clustering" title="Clustering"> 56 <section name="clustering" title="Clustering">
41 <param name="correlation_method" type="select" display="radio" label="Correlation method"> 57 <param name="correlation_method" type="select" display="radio" label="Correlation method">
42 <option value="pearson" selected="true"/> 58 <option value="pearson" selected="true"/>
43 <option value="spearman"/> 59 <option value="spearman"/>
44 </param> 60 </param>
45 <param name="correlation_threshold" type="float" value="0.7" label="Correlation threshold" help="Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite."/> 61 <param name="correlation_threshold" type="float" value="0.7">
46 <param name="min_cluster_size" type="integer" value="10" min="1" label="Minimum cluster size" help="The minimum number of nodes to be considered as a cluster."/> 62 <label>Correlation threshold</label>
47 <param name="deep_split" type="integer" value="2" min="0" max="4" label="Deep split" help="Deep split provides a rough control over sensitivity to cluster splitting. The higher the value, the more and smaller clusters will be produced (see WGCNA package documentation)."/> 63 <help>Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite.</help>
48 <param name="network_type" type="select" display="radio" label="Network type" help="Network type parameter affects how the network's adjacency matrix is created from the correlation matrix (see WGCNA package documentation)."> 64 </param>
65 <param name="min_cluster_size" type="integer" value="10" min="1">
66 <label>Minimum cluster size</label>
67 <help>The minimum number of nodes to be considered as a cluster.</help>
68 </param>
69 <param name="deep_split" type="integer" value="2" min="0" max="4">
70 <label>Deep split</label>
71 <help>
72 Deep split provides a rough control over sensitivity to cluster splitting. The higher the value,
73 the more and smaller clusters will be produced (see WGCNA package documentation).
74 </help>
75 </param>
76 <param name="network_type" type="select" display="radio">
77 <label>Network type</label>
78 <help>
79 Network type parameter affects how the network's adjacency matrix is created from the correlation
80 matrix (see WGCNA package documentation).
81 </help>
49 <option value="signed"/> 82 <option value="signed"/>
50 <option value="unsigned" selected="true"/> 83 <option value="unsigned" selected="true"/>
51 </param> 84 </param>
52 </section> 85 </section>
53
54 <section name="scoring" title="Scoring" expanded="true"> 86 <section name="scoring" title="Scoring" expanded="true">
55 <param name="strict_boosting" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Strict boosting" help="Boost the scores of metabolites that not only belongs to the same pathway but also to the same cluster. Otherwise, do not account for cluster membership."/> 87 <param name="strict_boosting" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
56 <repeat name="expected_adducts" title="Expected adducts" help="Require the presence of certain adducts for a high confidence match."> 88 <label>Strict boosting</label>
57 <param name="adduct" type="text" value="M+H" label="Adduct"/> 89 <help>
58 </repeat> 90 Boost the scores of metabolites that not only belongs to the same pathway but also to the same
59 <param name="boost" type="data" format="h5" optional="true" label="Validated compounds score boosting (optional)" help="Table of previously validated compounds to boost their scores and confidence levels. The 1st column of the table must contain IDs of compounds. The optional 2nd and 3rd columns may contain mz values and retention times."/> 91 cluster. Otherwise, do not account for cluster membership.
60 <param name="min_isp" type="integer" min="0" value="1" label="Minimum number of expected isotopes" help="Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match."/> 92 </help>
61 <param name="max_isp" type="integer" min="0" value="5" label="Maximum number of expected isotopes" help="Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match."/> 93 </param>
62 <param name="redundancy_filtering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Redundancy filtering" help="Whether to filter out low-scored multiple matcher or not."/> 94 <param name="expected_adducts" type="data" format="csv,tsv" optional="true">
95 <label>Expected adducts</label>
96 <help>
97 Require the presence of certain adducts for a high confidence match. By default, at least the
98 presence of an M+H adduct is required for a high confidence match.
99 </help>
100 <conversion name="expected_adducts_csv" type="csv"/>
101 </param>
102 <param name="boost_compounds" type="data" format="csv,tsv" optional="true">
103 <label>Validated compounds score boosting (optional)</label>
104 <help>
105 Table of previously validated compounds to boost their scores and confidence levels.
106 The 1st column of the table must contain IDs of compounds.
107 The optional 2nd and 3rd columns may contain mz values and retention times.
108 </help>
109 <conversion name="boost_compounds_csv" type="csv"/>
110 </param>
111 <param name="min_isp" type="integer" min="0" value="1">
112 <label>Minimum number of expected isotopes</label>
113 <help>
114 Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
115 </help>
116 </param>
117 <param name="max_isp" type="integer" min="0" value="5">
118 <label>Maximum number of expected isotopes</label>
119 <help>
120 Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
121 </help>
122 </param>
123 <param name="redundancy_filtering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
124 <label>Redundancy filtering</label>
125 <help>Whether to filter out low-scored multiple matcher or not.</help>
126 </param>
63 </section> 127 </section>
64 </inputs> 128 </inputs>
65 129
66 <outputs> 130 <outputs>
67 <data format="h5" name="annotation"/> 131 <expand macro="outputs"/>
68 </outputs> 132 </outputs>
69 133
70 <help><![CDATA[ 134 <help><![CDATA[
71 Annotate the peak intensity table (such as outputted from apLCMS) with metabolites from the metabolite database using advanced methods. 135 Annotate the peak intensity table (such as outputted from apLCMS) with compounds from the compounds database
136 using advanced methods.
72 137
73 The annotation process generates all possible metabolite-adduct pairs from the metabolite and adduct databases and matches those pairs to the measured peaks. 138 The annotation process generates all possible compound-adduct pairs and matches those pairs to the measured
74 A metabolite-adduct pair is pronounced as a match to the peak when the difference of their masses are withing some tolerance. 139 peaks. A compound-adduct pair is pronounced as a match to a certain peak when the difference of their masses are
75 Then a score and a confidence level is assigned to each match based on peak correlation clustering, metabolite pathway associations, adducts expectations, and isotope conformations. 140 withing some tolerance. Then, a score and a confidence level is assigned to each match based on peak correlation
141 clustering, metabolite pathway associations, adducts expectations, and isotope conformations.
76 ]]></help> 142 ]]></help>
77 143
78 <expand macro="citations"/> 144 <citations>
145 <expand macro="citations"/>
146 </citations>
79 </tool> 147 </tool>