halla: halla.xml comparison

comparison halla.xml @ 0:f6e288442812 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/halla commit 5cd01ad3808dff1ce4aae231706cbe2225079a04

author	iuc
date	Wed, 05 Nov 2025 09:37:09 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:f6e288442812
+<tool id="halla" name="HAllA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+<description>Hierarchical All-against-All association</description>
+<macros>
+<token name="@TOOL_VERSION@">0.8.40</token>
+<token name="@VERSION_SUFFIX@">0</token>
+<token name="@PROFILE@">24.0</token>
+</macros>
+<xrefs>
+<xref type="bio.tools"></xref>
+</xrefs>
+<requirements>
+<requirement type="package" version="@TOOL_VERSION@">halla</requirement>
+</requirements>
+<version_command><![CDATA[halla --version]]></version_command>
+<command detect_errors="exit_code"><![CDATA[
+halla
+-x '$x'
+--x_dataset_label
+#if $x_dataset_label
+'$x_dataset_label'
+#else
+'$x.element_identifier'
+#end if
+-y '$y'
+--y_dataset_label
+#if $y_dataset_label
+'$y_dataset_label'
+#else
+'$y.element_identifier'
+#end if
+$alla
+--max_freq_thresh $max_freq_thresh
+#if $transform_data_funcs
+--transform_data_funcs
+#for $foo in $transform_data_funcs
+$foo
+#end for
+#end if
+$disable_bypass_discretization_if_possible
+#if $discretize_func
+--discretize_func $discretize_func
+#end if
+#if $discretize_num_bins
+--discretize_num_bins $discretize_num_bins
+#end if
+--pdist_metric $pdist_metric
+$sim2dist_disable_abs
+--linkage_method $linkage_method
+--permute_func $permute_func
+--permute_iters $permute_iters
+$disable_permute_speedup
+--fdr_alpha $fdr_alpha
+--fdr_method $fdr_method
+--fnr_thresh $fnr_thresh
+--rank_cluster $rank_cluster
+#if $seed
+--seed $seed
+#end if
+--block_num $block_num
+$hallagram
+$diagnostic_plot
+-o output
+--num_threads "\${GALAXY_SLOTS:-4}"
+]]></command>
+<inputs>
+<param argument="-x" type="data" format="tabular" label="Dataset X" help="Tabular dataset with p features/rows and n samples/columns" />
+<param argument="-y" type="data" format="tabular" label="Dataset Y" help="Tabular dataset with d features/rows and n samples/columns" />
+<param argument="--alla" type="boolean" truevalue="--alla" falsevalue="" checked="false" label="Use AllA instead of HAllA" help="HAllA uses a hierarchical approach for block association discovery on top of an existing all-against-all (AllA) association matrix. Use this option to skip the block association step." />
+<param argument="--max_freq_thresh" type="float" min="0" max="1" value="1" label="Maximum frequency" help="features with max frequences >= the threshold will be removed" />
+<param argument="--transform_data_funcs" type="select" optional="true" multiple="true" label="Continuous data transformation function">
+<option value="zscore">zscore</option>
+<option value="rank">rank</option>
+<option value="quantile">quantile</option>
+</param>
+<param argument="--disable_bypass_discretization_if_possible" type="boolean" truevalue="--disable_bypass_discretization_if_possible" falsevalue="" checked="false" label="Discretize even if all features are continuous" help="By default discritization is bypassed if all features are continuous" />
+<param argument="--discretize_func" type="select" optional="true" label="Discretization function">
+<option value="quantile">quantile</option>
+<option value="kmeans">kmeans</option>
+<option value="uniform">uniform</option>
+<option value="jenks">jenks</option>
+</param>
+<param argument="--discretize_num_bins" type="integer" min="0" value="" optional="true" label="Discretization - number of bins"/>
+<param argument="--pdist_metric" type="select" label="Distance/similarity metric" help="default: spearman for continuous data, If there is at least one categorical variable in either dataset, HAllA will shift to Normalized Mutual Information (NMI) as an alternative similarity measure.">
+<option value="spearman" selected="true">Spearman</option>
+<option value="pearson">Pearson</option>
+<option value="dcor">Distance correlation</option>
+<option value="mi">mutual information</option>
+<option value="nmi">normalized mutual information</option>
+<option value="xicor">xi correlation</option>
+</param>
+<param argument="--sim2dist_disable_abs" type="boolean" truevalue="--sim2dist_disable_abs" falsevalue="" checked="false" label="Hierarchical clustering - disable setting similarity scores as absolute when computing distance" />
+<param argument="--linkage_method" type="select" label="Hierarchical clustering linkage method" help="see help below">
+<option value="single">single</option>
+<option value="complete">complete</option>
+<option value="average" selected="true">average</option>
+<option value="weighted">weighted</option>
+<option value="centroid">centroid</option>
+<option value="median">median</option>
+<option value="ward">ward</option>
+</param>
+<param argument="--permute_func" type="select" label="P-value approximation function" help=" in the p-value permutation test">
+<option value="gpd">gdp</option>
+<option value="ecdf">ecdf</option>
+</param>
+<param argument="--permute_iters" type="integer" min="0" value="1000" label="Number of iterations in the p-value permutation test"/>
+<param argument="--disable_permute_speedup" type="boolean" truevalue="--disable_permute_speedup" falsevalue="" checked="false" label="Do not break early in the permutation test if p-value is insignificant"/>
+<!-- \-\-force_permutations  If turned on, force permutation testing -->
+<param argument="--fdr_alpha" type="float" min="0" max="1" value="0.05" label="FDR threshold"/>
+<param argument="--fdr_method" type="select" label="FDR method" help="see help below">
+<option value="bonferroni">bonferroni: one-step correction</option>
+<option value="sidak">sidak: one-step correction</option>
+<option value="holm-sidak">holm-sidak: </option>
+<option value="holm">holm: </option>
+<option value="simes-hochberg">simes-hochberg: </option>
+<option value="hommel">hommel: </option>
+<option value="fdr_bh" selected="true">fdr_bh: Benjamini/Hochberg (non-negative)</option>
+<option value="fdr_by">fdr_by: Benjamini/Yekutieli (negative)</option>
+<option value="fdr_tsbh">fdr_tsbh: two stage fdr correction (non-negative)</option>
+<option value="fdr_tsbky">fdr_tsbky: two stage fdr correction (non-negative)</option>
+</param>
+<param argument="--fnr_thresh" type="float" min="0" max="1" value="0.05" label="FNR threshold"/>
+<param argument="--rank_cluster" type="select" label="Procedure to rank cluster using the p-values within the cluster">
+<option value="best" selected="true">best</option>
+<option value="average">average</option>
+</param>
+<param argument="--seed" type="integer" value="" optional="true" label="Randomization seed" />
+<param argument="--hallagram" type="boolean" truevalue="--hallagram" falsevalue="--no_hallagram" checked="true" label="Generate hallagram" />
+<param argument="--x_dataset_label" type="text" label="Hallagram/clustermap: label for X dataset" help="By default the dataset identifier is used"/>
+<param argument="--y_dataset_label" type="text" label="Hallagram/clustermap: label for Y dataset" help="By default the dataset identifier is used"/>
+<param argument="--block_num" type="integer" min="-1" value="-1" label="Number of top clusters in hallagram" help="-1: show all clusters"/>
+<param argument="--diagnostic_plot" type="boolean" truevalue="--diagnostic_plot" falsevalue="" checked="false" label="Generates diagnostic plot" />
+</inputs>
+<outputs>
+<data name="sig_clusters" format="tabular" from_work_dir="output/sig_clusters.txt" label="${tool.name} on ${on_string}: block associations"/>
+<data name="all_associations" format="tabular" from_work_dir="output/all_associations.txt" label="${tool.name} on ${on_string}: all associations"/>
+<data name="hallagram_out" format="pdf" from_work_dir="output/hallagram.pdf" label="${tool.name} on ${on_string}: hallagram">
+<filter>hallagram is True</filter>
+</data>
+<collection name="diagnostic_plot_out" format="pdf" type="list" label="${tool.name} on ${on_string}: diagnostic plots">
+<discover_datasets pattern="(?P&lt;designation&gt;association_.*)\.pdf" format="pdf" directory="output/diagnostic/" />
+<filter>diagnostic_plot is True</filter>
+</collection>
+</outputs>
+<tests>
+<test expect_num_outputs="3">
+<param name="x" value="X_16_100.txt"/>
+<param name="y" value="Y_16_100.txt"/>
+<param name="seed" value="42"/>
+<output name="sig_clusters" value="sig_clusters.tsv"/>
+<output name="all_associations" value="all_associations.tsv"/>
+</test>
+<test expect_num_outputs="3">
+<param name="x" value="X_16_100.txt"/>
+<param name="y" value="Y_16_100.txt"/>
+<param name="seed" value="42"/>
+<param name="hallagram" value="false"/>
+<param name="diagnostic_plot" value="true"/>
+<output name="sig_clusters" value="sig_clusters.tsv"/>
+<output name="all_associations" value="all_associations.tsv"/>
+<output_collection name="diagnostic_plot_out" type="list" count="18">
+<element name="association_1" value="association_1.pdf"/>
+</output_collection>
+</test>
+</tests>
+<help><![CDATA[
+.. class:: infomark
+**What it does**
+HAllA (Hierarchical All-against-All association) is a method for finding blocks of associated features in high-dimensional datasets
+measured from a common set of samples. HAllA operates by
+1. optionally discretizing mixed continuous and categorical features to a uniform representation
+2. hierarchically clustering each dataset separately to generate a pair of data hierarchies,
+3. performing all-against-all association testing between features across two datasets using robust measures of correlation,
+4. determining the statistical significance of individual associations by permutation testing, and
+5. iteratively subdividing the space of significant all-against-all correlations into blocks of densely associated occurring as clusters in the original datasets.
+Tutorial https://github.com/biobakery/biobakery/wiki/halla
+Usage
+.....
+**Input**
+Data in scientific studies often come paired in the form of two high-dimensional datasets, where the dataset
+X (with p features/rows and n samples/columns) are assumed to be p predictor variables (or features) measured
+on n samples that give rise to d response variables contained in the dataset Y (with d features/rows and n samples/columns).
+Note that column i of X is sampled jointly with column i of Y, so that X and Y are aligned.
+**Output**
+HAllA reports significant associations between clusters of related features ("block associations").
+Each block association is characterized by a cluster from the first dataset, a cluster from the second dataset,
+and measures of statistical significance and effect size (p-value, q-value, and similarity score) for the cluster's
+component pairwise associations.
+- **block associations** which reports block associations between the two datasets' features
+- **all associations** which reports the pairwise similarity scores for all features across the two datasets
+- **hallagram** graphical representation discovered block associations
+- **diagnostic plots** (optional)lattice plot showing the pairwise associations between microbiome features and metadata for each significant cluster.
+**Notes**
+Details on the available:
+- Hierarchical clustering linkage methods https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
+- FDR methods https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html
+]]></help>
+<citations>
+<citation type="doi">10.1093/bioinformatics/btac232</citation>
+</citations>
+</tool>

Mercurial > repos > iuc > halla

comparison halla.xml @ 0:f6e288442812 draft default tip