Mercurial > repos > iuc > halla
comparison halla.xml @ 0:f6e288442812 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/halla commit 5cd01ad3808dff1ce4aae231706cbe2225079a04
| author | iuc |
|---|---|
| date | Wed, 05 Nov 2025 09:37:09 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f6e288442812 |
|---|---|
| 1 <tool id="halla" name="HAllA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
| 2 <description>Hierarchical All-against-All association</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">0.8.40</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 <token name="@PROFILE@">24.0</token> | |
| 7 </macros> | |
| 8 <xrefs> | |
| 9 <xref type="bio.tools"></xref> | |
| 10 </xrefs> | |
| 11 <requirements> | |
| 12 <requirement type="package" version="@TOOL_VERSION@">halla</requirement> | |
| 13 </requirements> | |
| 14 <version_command><![CDATA[halla --version]]></version_command> | |
| 15 <command detect_errors="exit_code"><![CDATA[ | |
| 16 halla | |
| 17 -x '$x' | |
| 18 --x_dataset_label | |
| 19 #if $x_dataset_label | |
| 20 '$x_dataset_label' | |
| 21 #else | |
| 22 '$x.element_identifier' | |
| 23 #end if | |
| 24 -y '$y' | |
| 25 --y_dataset_label | |
| 26 #if $y_dataset_label | |
| 27 '$y_dataset_label' | |
| 28 #else | |
| 29 '$y.element_identifier' | |
| 30 #end if | |
| 31 $alla | |
| 32 --max_freq_thresh $max_freq_thresh | |
| 33 #if $transform_data_funcs | |
| 34 --transform_data_funcs | |
| 35 #for $foo in $transform_data_funcs | |
| 36 $foo | |
| 37 #end for | |
| 38 #end if | |
| 39 $disable_bypass_discretization_if_possible | |
| 40 #if $discretize_func | |
| 41 --discretize_func $discretize_func | |
| 42 #end if | |
| 43 #if $discretize_num_bins | |
| 44 --discretize_num_bins $discretize_num_bins | |
| 45 #end if | |
| 46 --pdist_metric $pdist_metric | |
| 47 $sim2dist_disable_abs | |
| 48 --linkage_method $linkage_method | |
| 49 --permute_func $permute_func | |
| 50 --permute_iters $permute_iters | |
| 51 $disable_permute_speedup | |
| 52 --fdr_alpha $fdr_alpha | |
| 53 --fdr_method $fdr_method | |
| 54 --fnr_thresh $fnr_thresh | |
| 55 --rank_cluster $rank_cluster | |
| 56 #if $seed | |
| 57 --seed $seed | |
| 58 #end if | |
| 59 --block_num $block_num | |
| 60 $hallagram | |
| 61 $diagnostic_plot | |
| 62 -o output | |
| 63 --num_threads "\${GALAXY_SLOTS:-4}" | |
| 64 ]]></command> | |
| 65 <inputs> | |
| 66 <param argument="-x" type="data" format="tabular" label="Dataset X" help="Tabular dataset with p features/rows and n samples/columns" /> | |
| 67 <param argument="-y" type="data" format="tabular" label="Dataset Y" help="Tabular dataset with d features/rows and n samples/columns" /> | |
| 68 <param argument="--alla" type="boolean" truevalue="--alla" falsevalue="" checked="false" label="Use AllA instead of HAllA" help="HAllA uses a hierarchical approach for block association discovery on top of an existing all-against-all (AllA) association matrix. Use this option to skip the block association step." /> | |
| 69 <param argument="--max_freq_thresh" type="float" min="0" max="1" value="1" label="Maximum frequency" help="features with max frequences >= the threshold will be removed" /> | |
| 70 <param argument="--transform_data_funcs" type="select" optional="true" multiple="true" label="Continuous data transformation function"> | |
| 71 <option value="zscore">zscore</option> | |
| 72 <option value="rank">rank</option> | |
| 73 <option value="quantile">quantile</option> | |
| 74 </param> | |
| 75 | |
| 76 <param argument="--disable_bypass_discretization_if_possible" type="boolean" truevalue="--disable_bypass_discretization_if_possible" falsevalue="" checked="false" label="Discretize even if all features are continuous" help="By default discritization is bypassed if all features are continuous" /> | |
| 77 <param argument="--discretize_func" type="select" optional="true" label="Discretization function"> | |
| 78 <option value="quantile">quantile</option> | |
| 79 <option value="kmeans">kmeans</option> | |
| 80 <option value="uniform">uniform</option> | |
| 81 <option value="jenks">jenks</option> | |
| 82 </param> | |
| 83 <param argument="--discretize_num_bins" type="integer" min="0" value="" optional="true" label="Discretization - number of bins"/> | |
| 84 | |
| 85 <param argument="--pdist_metric" type="select" label="Distance/similarity metric" help="default: spearman for continuous data, If there is at least one categorical variable in either dataset, HAllA will shift to Normalized Mutual Information (NMI) as an alternative similarity measure."> | |
| 86 <option value="spearman" selected="true">Spearman</option> | |
| 87 <option value="pearson">Pearson</option> | |
| 88 <option value="dcor">Distance correlation</option> | |
| 89 <option value="mi">mutual information</option> | |
| 90 <option value="nmi">normalized mutual information</option> | |
| 91 <option value="xicor">xi correlation</option> | |
| 92 </param> | |
| 93 | |
| 94 <param argument="--sim2dist_disable_abs" type="boolean" truevalue="--sim2dist_disable_abs" falsevalue="" checked="false" label="Hierarchical clustering - disable setting similarity scores as absolute when computing distance" /> | |
| 95 <param argument="--linkage_method" type="select" label="Hierarchical clustering linkage method" help="see help below"> | |
| 96 <option value="single">single</option> | |
| 97 <option value="complete">complete</option> | |
| 98 <option value="average" selected="true">average</option> | |
| 99 <option value="weighted">weighted</option> | |
| 100 <option value="centroid">centroid</option> | |
| 101 <option value="median">median</option> | |
| 102 <option value="ward">ward</option> | |
| 103 </param> | |
| 104 | |
| 105 <param argument="--permute_func" type="select" label="P-value approximation function" help=" in the p-value permutation test"> | |
| 106 <option value="gpd">gdp</option> | |
| 107 <option value="ecdf">ecdf</option> | |
| 108 </param> | |
| 109 <param argument="--permute_iters" type="integer" min="0" value="1000" label="Number of iterations in the p-value permutation test"/> | |
| 110 <param argument="--disable_permute_speedup" type="boolean" truevalue="--disable_permute_speedup" falsevalue="" checked="false" label="Do not break early in the permutation test if p-value is insignificant"/> | |
| 111 <!-- \-\-force_permutations If turned on, force permutation testing --> | |
| 112 | |
| 113 <param argument="--fdr_alpha" type="float" min="0" max="1" value="0.05" label="FDR threshold"/> | |
| 114 <param argument="--fdr_method" type="select" label="FDR method" help="see help below"> | |
| 115 <option value="bonferroni">bonferroni: one-step correction</option> | |
| 116 <option value="sidak">sidak: one-step correction</option> | |
| 117 <option value="holm-sidak">holm-sidak: </option> | |
| 118 <option value="holm">holm: </option> | |
| 119 <option value="simes-hochberg">simes-hochberg: </option> | |
| 120 <option value="hommel">hommel: </option> | |
| 121 <option value="fdr_bh" selected="true">fdr_bh: Benjamini/Hochberg (non-negative)</option> | |
| 122 <option value="fdr_by">fdr_by: Benjamini/Yekutieli (negative)</option> | |
| 123 <option value="fdr_tsbh">fdr_tsbh: two stage fdr correction (non-negative)</option> | |
| 124 <option value="fdr_tsbky">fdr_tsbky: two stage fdr correction (non-negative)</option> | |
| 125 </param> | |
| 126 <param argument="--fnr_thresh" type="float" min="0" max="1" value="0.05" label="FNR threshold"/> | |
| 127 <param argument="--rank_cluster" type="select" label="Procedure to rank cluster using the p-values within the cluster"> | |
| 128 <option value="best" selected="true">best</option> | |
| 129 <option value="average">average</option> | |
| 130 </param> | |
| 131 <param argument="--seed" type="integer" value="" optional="true" label="Randomization seed" /> | |
| 132 | |
| 133 <param argument="--hallagram" type="boolean" truevalue="--hallagram" falsevalue="--no_hallagram" checked="true" label="Generate hallagram" /> | |
| 134 <param argument="--x_dataset_label" type="text" label="Hallagram/clustermap: label for X dataset" help="By default the dataset identifier is used"/> | |
| 135 <param argument="--y_dataset_label" type="text" label="Hallagram/clustermap: label for Y dataset" help="By default the dataset identifier is used"/> | |
| 136 <param argument="--block_num" type="integer" min="-1" value="-1" label="Number of top clusters in hallagram" help="-1: show all clusters"/> | |
| 137 | |
| 138 <param argument="--diagnostic_plot" type="boolean" truevalue="--diagnostic_plot" falsevalue="" checked="false" label="Generates diagnostic plot" /> | |
| 139 </inputs> | |
| 140 <outputs> | |
| 141 <data name="sig_clusters" format="tabular" from_work_dir="output/sig_clusters.txt" label="${tool.name} on ${on_string}: block associations"/> | |
| 142 <data name="all_associations" format="tabular" from_work_dir="output/all_associations.txt" label="${tool.name} on ${on_string}: all associations"/> | |
| 143 <data name="hallagram_out" format="pdf" from_work_dir="output/hallagram.pdf" label="${tool.name} on ${on_string}: hallagram"> | |
| 144 <filter>hallagram is True</filter> | |
| 145 </data> | |
| 146 <collection name="diagnostic_plot_out" format="pdf" type="list" label="${tool.name} on ${on_string}: diagnostic plots"> | |
| 147 <discover_datasets pattern="(?P<designation>association_.*)\.pdf" format="pdf" directory="output/diagnostic/" /> | |
| 148 <filter>diagnostic_plot is True</filter> | |
| 149 </collection> | |
| 150 </outputs> | |
| 151 <tests> | |
| 152 <test expect_num_outputs="3"> | |
| 153 <param name="x" value="X_16_100.txt"/> | |
| 154 <param name="y" value="Y_16_100.txt"/> | |
| 155 <param name="seed" value="42"/> | |
| 156 <output name="sig_clusters" value="sig_clusters.tsv"/> | |
| 157 <output name="all_associations" value="all_associations.tsv"/> | |
| 158 </test> | |
| 159 <test expect_num_outputs="3"> | |
| 160 <param name="x" value="X_16_100.txt"/> | |
| 161 <param name="y" value="Y_16_100.txt"/> | |
| 162 <param name="seed" value="42"/> | |
| 163 <param name="hallagram" value="false"/> | |
| 164 <param name="diagnostic_plot" value="true"/> | |
| 165 <output name="sig_clusters" value="sig_clusters.tsv"/> | |
| 166 <output name="all_associations" value="all_associations.tsv"/> | |
| 167 <output_collection name="diagnostic_plot_out" type="list" count="18"> | |
| 168 <element name="association_1" value="association_1.pdf"/> | |
| 169 </output_collection> | |
| 170 </test> | |
| 171 </tests> | |
| 172 <help><![CDATA[ | |
| 173 | |
| 174 .. class:: infomark | |
| 175 | |
| 176 **What it does** | |
| 177 | |
| 178 HAllA (Hierarchical All-against-All association) is a method for finding blocks of associated features in high-dimensional datasets | |
| 179 measured from a common set of samples. HAllA operates by | |
| 180 | |
| 181 1. optionally discretizing mixed continuous and categorical features to a uniform representation | |
| 182 2. hierarchically clustering each dataset separately to generate a pair of data hierarchies, | |
| 183 3. performing all-against-all association testing between features across two datasets using robust measures of correlation, | |
| 184 4. determining the statistical significance of individual associations by permutation testing, and | |
| 185 5. iteratively subdividing the space of significant all-against-all correlations into blocks of densely associated occurring as clusters in the original datasets. | |
| 186 | |
| 187 Tutorial https://github.com/biobakery/biobakery/wiki/halla | |
| 188 | |
| 189 Usage | |
| 190 ..... | |
| 191 | |
| 192 **Input** | |
| 193 | |
| 194 Data in scientific studies often come paired in the form of two high-dimensional datasets, where the dataset | |
| 195 X (with p features/rows and n samples/columns) are assumed to be p predictor variables (or features) measured | |
| 196 on n samples that give rise to d response variables contained in the dataset Y (with d features/rows and n samples/columns). | |
| 197 Note that column i of X is sampled jointly with column i of Y, so that X and Y are aligned. | |
| 198 | |
| 199 **Output** | |
| 200 | |
| 201 HAllA reports significant associations between clusters of related features ("block associations"). | |
| 202 Each block association is characterized by a cluster from the first dataset, a cluster from the second dataset, | |
| 203 and measures of statistical significance and effect size (p-value, q-value, and similarity score) for the cluster's | |
| 204 component pairwise associations. | |
| 205 | |
| 206 - **block associations** which reports block associations between the two datasets' features | |
| 207 - **all associations** which reports the pairwise similarity scores for all features across the two datasets | |
| 208 - **hallagram** graphical representation discovered block associations | |
| 209 - **diagnostic plots** (optional)lattice plot showing the pairwise associations between microbiome features and metadata for each significant cluster. | |
| 210 | |
| 211 **Notes** | |
| 212 | |
| 213 Details on the available: | |
| 214 | |
| 215 - Hierarchical clustering linkage methods https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html | |
| 216 - FDR methods https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html | |
| 217 | |
| 218 ]]></help> | |
| 219 <citations> | |
| 220 <citation type="doi">10.1093/bioinformatics/btac232</citation> | |
| 221 </citations> | |
| 222 </tool> |
