comparison halla.xml @ 0:f6e288442812 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/halla commit 5cd01ad3808dff1ce4aae231706cbe2225079a04
author iuc
date Wed, 05 Nov 2025 09:37:09 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f6e288442812
1 <tool id="halla" name="HAllA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>Hierarchical All-against-All association</description>
3 <macros>
4 <token name="@TOOL_VERSION@">0.8.40</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">24.0</token>
7 </macros>
8 <xrefs>
9 <xref type="bio.tools"></xref>
10 </xrefs>
11 <requirements>
12 <requirement type="package" version="@TOOL_VERSION@">halla</requirement>
13 </requirements>
14 <version_command><![CDATA[halla --version]]></version_command>
15 <command detect_errors="exit_code"><![CDATA[
16 halla
17 -x '$x'
18 --x_dataset_label
19 #if $x_dataset_label
20 '$x_dataset_label'
21 #else
22 '$x.element_identifier'
23 #end if
24 -y '$y'
25 --y_dataset_label
26 #if $y_dataset_label
27 '$y_dataset_label'
28 #else
29 '$y.element_identifier'
30 #end if
31 $alla
32 --max_freq_thresh $max_freq_thresh
33 #if $transform_data_funcs
34 --transform_data_funcs
35 #for $foo in $transform_data_funcs
36 $foo
37 #end for
38 #end if
39 $disable_bypass_discretization_if_possible
40 #if $discretize_func
41 --discretize_func $discretize_func
42 #end if
43 #if $discretize_num_bins
44 --discretize_num_bins $discretize_num_bins
45 #end if
46 --pdist_metric $pdist_metric
47 $sim2dist_disable_abs
48 --linkage_method $linkage_method
49 --permute_func $permute_func
50 --permute_iters $permute_iters
51 $disable_permute_speedup
52 --fdr_alpha $fdr_alpha
53 --fdr_method $fdr_method
54 --fnr_thresh $fnr_thresh
55 --rank_cluster $rank_cluster
56 #if $seed
57 --seed $seed
58 #end if
59 --block_num $block_num
60 $hallagram
61 $diagnostic_plot
62 -o output
63 --num_threads "\${GALAXY_SLOTS:-4}"
64 ]]></command>
65 <inputs>
66 <param argument="-x" type="data" format="tabular" label="Dataset X" help="Tabular dataset with p features/rows and n samples/columns" />
67 <param argument="-y" type="data" format="tabular" label="Dataset Y" help="Tabular dataset with d features/rows and n samples/columns" />
68 <param argument="--alla" type="boolean" truevalue="--alla" falsevalue="" checked="false" label="Use AllA instead of HAllA" help="HAllA uses a hierarchical approach for block association discovery on top of an existing all-against-all (AllA) association matrix. Use this option to skip the block association step." />
69 <param argument="--max_freq_thresh" type="float" min="0" max="1" value="1" label="Maximum frequency" help="features with max frequences >= the threshold will be removed" />
70 <param argument="--transform_data_funcs" type="select" optional="true" multiple="true" label="Continuous data transformation function">
71 <option value="zscore">zscore</option>
72 <option value="rank">rank</option>
73 <option value="quantile">quantile</option>
74 </param>
75
76 <param argument="--disable_bypass_discretization_if_possible" type="boolean" truevalue="--disable_bypass_discretization_if_possible" falsevalue="" checked="false" label="Discretize even if all features are continuous" help="By default discritization is bypassed if all features are continuous" />
77 <param argument="--discretize_func" type="select" optional="true" label="Discretization function">
78 <option value="quantile">quantile</option>
79 <option value="kmeans">kmeans</option>
80 <option value="uniform">uniform</option>
81 <option value="jenks">jenks</option>
82 </param>
83 <param argument="--discretize_num_bins" type="integer" min="0" value="" optional="true" label="Discretization - number of bins"/>
84
85 <param argument="--pdist_metric" type="select" label="Distance/similarity metric" help="default: spearman for continuous data, If there is at least one categorical variable in either dataset, HAllA will shift to Normalized Mutual Information (NMI) as an alternative similarity measure.">
86 <option value="spearman" selected="true">Spearman</option>
87 <option value="pearson">Pearson</option>
88 <option value="dcor">Distance correlation</option>
89 <option value="mi">mutual information</option>
90 <option value="nmi">normalized mutual information</option>
91 <option value="xicor">xi correlation</option>
92 </param>
93
94 <param argument="--sim2dist_disable_abs" type="boolean" truevalue="--sim2dist_disable_abs" falsevalue="" checked="false" label="Hierarchical clustering - disable setting similarity scores as absolute when computing distance" />
95 <param argument="--linkage_method" type="select" label="Hierarchical clustering linkage method" help="see help below">
96 <option value="single">single</option>
97 <option value="complete">complete</option>
98 <option value="average" selected="true">average</option>
99 <option value="weighted">weighted</option>
100 <option value="centroid">centroid</option>
101 <option value="median">median</option>
102 <option value="ward">ward</option>
103 </param>
104
105 <param argument="--permute_func" type="select" label="P-value approximation function" help=" in the p-value permutation test">
106 <option value="gpd">gdp</option>
107 <option value="ecdf">ecdf</option>
108 </param>
109 <param argument="--permute_iters" type="integer" min="0" value="1000" label="Number of iterations in the p-value permutation test"/>
110 <param argument="--disable_permute_speedup" type="boolean" truevalue="--disable_permute_speedup" falsevalue="" checked="false" label="Do not break early in the permutation test if p-value is insignificant"/>
111 <!-- \-\-force_permutations If turned on, force permutation testing -->
112
113 <param argument="--fdr_alpha" type="float" min="0" max="1" value="0.05" label="FDR threshold"/>
114 <param argument="--fdr_method" type="select" label="FDR method" help="see help below">
115 <option value="bonferroni">bonferroni: one-step correction</option>
116 <option value="sidak">sidak: one-step correction</option>
117 <option value="holm-sidak">holm-sidak: </option>
118 <option value="holm">holm: </option>
119 <option value="simes-hochberg">simes-hochberg: </option>
120 <option value="hommel">hommel: </option>
121 <option value="fdr_bh" selected="true">fdr_bh: Benjamini/Hochberg (non-negative)</option>
122 <option value="fdr_by">fdr_by: Benjamini/Yekutieli (negative)</option>
123 <option value="fdr_tsbh">fdr_tsbh: two stage fdr correction (non-negative)</option>
124 <option value="fdr_tsbky">fdr_tsbky: two stage fdr correction (non-negative)</option>
125 </param>
126 <param argument="--fnr_thresh" type="float" min="0" max="1" value="0.05" label="FNR threshold"/>
127 <param argument="--rank_cluster" type="select" label="Procedure to rank cluster using the p-values within the cluster">
128 <option value="best" selected="true">best</option>
129 <option value="average">average</option>
130 </param>
131 <param argument="--seed" type="integer" value="" optional="true" label="Randomization seed" />
132
133 <param argument="--hallagram" type="boolean" truevalue="--hallagram" falsevalue="--no_hallagram" checked="true" label="Generate hallagram" />
134 <param argument="--x_dataset_label" type="text" label="Hallagram/clustermap: label for X dataset" help="By default the dataset identifier is used"/>
135 <param argument="--y_dataset_label" type="text" label="Hallagram/clustermap: label for Y dataset" help="By default the dataset identifier is used"/>
136 <param argument="--block_num" type="integer" min="-1" value="-1" label="Number of top clusters in hallagram" help="-1: show all clusters"/>
137
138 <param argument="--diagnostic_plot" type="boolean" truevalue="--diagnostic_plot" falsevalue="" checked="false" label="Generates diagnostic plot" />
139 </inputs>
140 <outputs>
141 <data name="sig_clusters" format="tabular" from_work_dir="output/sig_clusters.txt" label="${tool.name} on ${on_string}: block associations"/>
142 <data name="all_associations" format="tabular" from_work_dir="output/all_associations.txt" label="${tool.name} on ${on_string}: all associations"/>
143 <data name="hallagram_out" format="pdf" from_work_dir="output/hallagram.pdf" label="${tool.name} on ${on_string}: hallagram">
144 <filter>hallagram is True</filter>
145 </data>
146 <collection name="diagnostic_plot_out" format="pdf" type="list" label="${tool.name} on ${on_string}: diagnostic plots">
147 <discover_datasets pattern="(?P&lt;designation&gt;association_.*)\.pdf" format="pdf" directory="output/diagnostic/" />
148 <filter>diagnostic_plot is True</filter>
149 </collection>
150 </outputs>
151 <tests>
152 <test expect_num_outputs="3">
153 <param name="x" value="X_16_100.txt"/>
154 <param name="y" value="Y_16_100.txt"/>
155 <param name="seed" value="42"/>
156 <output name="sig_clusters" value="sig_clusters.tsv"/>
157 <output name="all_associations" value="all_associations.tsv"/>
158 </test>
159 <test expect_num_outputs="3">
160 <param name="x" value="X_16_100.txt"/>
161 <param name="y" value="Y_16_100.txt"/>
162 <param name="seed" value="42"/>
163 <param name="hallagram" value="false"/>
164 <param name="diagnostic_plot" value="true"/>
165 <output name="sig_clusters" value="sig_clusters.tsv"/>
166 <output name="all_associations" value="all_associations.tsv"/>
167 <output_collection name="diagnostic_plot_out" type="list" count="18">
168 <element name="association_1" value="association_1.pdf"/>
169 </output_collection>
170 </test>
171 </tests>
172 <help><![CDATA[
173
174 .. class:: infomark
175
176 **What it does**
177
178 HAllA (Hierarchical All-against-All association) is a method for finding blocks of associated features in high-dimensional datasets
179 measured from a common set of samples. HAllA operates by
180
181 1. optionally discretizing mixed continuous and categorical features to a uniform representation
182 2. hierarchically clustering each dataset separately to generate a pair of data hierarchies,
183 3. performing all-against-all association testing between features across two datasets using robust measures of correlation,
184 4. determining the statistical significance of individual associations by permutation testing, and
185 5. iteratively subdividing the space of significant all-against-all correlations into blocks of densely associated occurring as clusters in the original datasets.
186
187 Tutorial https://github.com/biobakery/biobakery/wiki/halla
188
189 Usage
190 .....
191
192 **Input**
193
194 Data in scientific studies often come paired in the form of two high-dimensional datasets, where the dataset
195 X (with p features/rows and n samples/columns) are assumed to be p predictor variables (or features) measured
196 on n samples that give rise to d response variables contained in the dataset Y (with d features/rows and n samples/columns).
197 Note that column i of X is sampled jointly with column i of Y, so that X and Y are aligned.
198
199 **Output**
200
201 HAllA reports significant associations between clusters of related features ("block associations").
202 Each block association is characterized by a cluster from the first dataset, a cluster from the second dataset,
203 and measures of statistical significance and effect size (p-value, q-value, and similarity score) for the cluster's
204 component pairwise associations.
205
206 - **block associations** which reports block associations between the two datasets' features
207 - **all associations** which reports the pairwise similarity scores for all features across the two datasets
208 - **hallagram** graphical representation discovered block associations
209 - **diagnostic plots** (optional)lattice plot showing the pairwise associations between microbiome features and metadata for each significant cluster.
210
211 **Notes**
212
213 Details on the available:
214
215 - Hierarchical clustering linkage methods https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
216 - FDR methods https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html
217
218 ]]></help>
219 <citations>
220 <citation type="doi">10.1093/bioinformatics/btac232</citation>
221 </citations>
222 </tool>