episcanpy_cluster_embed: cluster

comparison cluster_embed.xml @ 0:43711b22f28b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5

author	iuc
date	Tue, 18 Apr 2023 13:18:25 +0000
parents
children	51dbb534fbce

comparison

equal deleted inserted replaced

--1:000000000000
+:43711b22f28b
+<tool id="episcanpy_cluster_embed" name="Cluster, embed and annotate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+<description>with EpiScanpy</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="bio_tools"/>
+<expand macro="requirements"/>
+<expand macro="version_command"/>
+<command detect_errors="exit_code"><![CDATA[
+@CMD@
+]]></command>
+<configfiles>
+<configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+import episcanpy as esc
+#if $method.method == 'pp.lazy'
+esc.pp.lazy(
+adata,
+pp_pca=$method.lazy_pp_pca,
+svd_solver='$method.lazy_svd_solver',
+nb_pcs=$method.lazy_nb_pcs,
+n_neighbors=$method.lazy_n_neighbors,
+perplexity=$method.lazy_perplexity,
+method='$method.lazy_method',
+metric='$method.lazy_metric',
+min_dist=$method.lazy_min_dist,
+spread=$method.lazy_spread,
+use_highly_variable=$method.lazy_use_highly_variable,
+n_components=$method.lazy_n_components,
+)
+#else if $method.method == 'tl.rank_features'
+esc.tl.rank_features(
+adata,
+omic='ATAC',
+groupby='$method.rank_features_groupby',
+use_raw=$method.rank_features_use_raw,
+groups='$method.rank_features_groups',
+reference='$method.rank_features_reference',
+n_features=$method.rank_features_n_features,
+rankby_abs=$method.rank_features_rankby_abs,
+key_added='$method.rank_features_key_added',
+copy=False,
+method='$method.rank_features_method',
+corr_method='$method.rank_features_corr_method'
+)
+#else if $method.method == 'tl.find_genes'
+esc.tl.find_genes(
+adata,
+gtf_file='$method.find_genes_gtf_file',
+key_added='$method.find_genes_key_added',
+upstream=$method.find_genes_upstream,
+feature_type='$method.find_genes_feature_type',
+annotation='$method.find_genes_annotation',
+raw=$method.find_genes_raw)
+#else if $method.method == 'tl.get_n_clusters'
+esc.tl.getNClusters(
+adata,
+n_cluster=$method.get_n_clusters_n_cluster,
+range_min=$method.get_n_clusters_range_min,
+range_max=$method.get_n_clusters_range_max,
+max_steps=$method.get_n_clusters_max_steps,
+method='$method.get_n_clusters_method',
+key_added='$method.get_n_clusters_key_added'
+)
+#else if $method.method == 'tl.kmeans'
+esc.tl.kmeans(
+adata,
+num_clusters=$method.kmeans_num_clusters
+)
+#else if $method.method == 'tl.hc'
+esc.tl.hc(
+adata,
+num_clusters=$method.hc_num_clusters
+)
+#end if
+adata.write('anndata.h5ad')
+]]></configfile>
+</configfiles>
+<inputs>
+<expand macro="inputs_anndata"/>
+<conditional name="method">
+<param argument="method" type="select" label="Method used for Clustering or Embedding">
+<option value="pp.lazy">Embedding: Automatically compute PCA coordinates, loadings and variance decomposition, a neighborhood graph of observations, t-distributed stochastic neighborhood embedding (tSNE) Uniform Manifold Approximation and Projection (UMAP), using 'pp.lazy'</option>
+<option value="tl.rank_features">Rank features for characterizing groups, using 'tl.rank_features'</option>
+<option value="tl.find_genes">Embedding: Find genes and add annotations, using 'pp.find_genes'</option>
+<option value="tl.get_n_clusters">Clustering: Test different settings of louvain to obtain the target number of clusters, using 'tl.getNClusters'</option>
+<option value="tl.kmeans">Clustering: Compute kmeans clustering using X_pca fits, using 'tl.kmeans'</option>
+<option value="tl.hc">Clustering: Compute hierarchical clustering using X_pca fits, using 'tl.hc'</option>
+</param>
+<when value="pp.lazy">
+<param name="lazy_pp_pca" value="True" type="select" label="Compute PCA coordinates before the neighborhood graph" help="(pp_pca)">
+<option value="True" selected="true">True</option>
+<option value="False">False</option>
+</param>
+<param name="lazy_svd_solver" type="select" label="SVD solver to use" help="(svd_solver)">
+<option value="arpack" selected="true">arpack (for the ARPACK wrapper in SciPy)</option>
+<option value="randomized">randomized (for the randomized algorithm due to Halko (2009)</option>
+<option value="auto">auto (chooses automatically depending on the size of the problem)</option>
+<option value="lobpcg">lobpcg (an alternative SciPy solver)</option>
+</param>
+<param name="lazy_nb_pcs" value="50" min="0" type="integer" label="Number of principal component computed for PCA (and therefore neighbors, tsne and umap)" help="(nb_pcs)"/>
+<param name="lazy_n_neighbors" value="15" min="0" type="integer" label="Size of the local neighborhood (number of neighboring data points) used for manifold approximation" help="(n_neighbors)"/>
+<param name="lazy_perplexity" value="30" min="0" type="integer" label="Perplexity (number of nearest neighbors used in other manifold learning algorithms)" help="(perplexity)"/>
+<param name="lazy_method" type="select" label="Kernel to use for computing connectivities" help="(method)">
+<option value="umap" selected="true">umap</option>
+<option value="gauss">gauss</option>
+</param>
+<param name="lazy_metric" type="select" label="Metric that returns a distance" help="(metric)">
+<option value="euclidean" selected="true">euclidean</option>
+<option value="cityblock">cityblock</option>
+<option value="cosine">cosine</option>
+<option value="l1">l1</option>
+<option value="l2">l2</option>
+<option value="manhattan">manhattan</option>
+<option value="braycurtis">braycurtis</option>
+<option value="canberra">canberra</option>
+<option value="chebyshev">chebyshev</option>
+<option value="correlation">correlation</option>
+<option value="dice">dice</option>
+<option value="hamming">hamming</option>
+<option value="jaccard">jaccard</option>
+<option value="kulsinski">kulsinski</option>
+<option value="mahalanobis">mahalanobis</option>
+<option value="minkowski">minkowski</option>
+<option value="rogerstanimoto">rogerstanimoto</option>
+<option value="russelrao">russelrao</option>
+<option value="seuclidean">seuclidean</option>
+<option value="sokalmichener">sokalmichener</option>
+<option value="sokalsneath">sokalsneath</option>
+<option value="sqeuclidean">sqeuclidean</option>
+<option value="yule">yule</option>
+</param>
+<param name="lazy_min_dist" value="0.5" min="0" type="float" label="The effective minimum distance between embedded points" help="(min_dist)"/>
+<param name="lazy_spread" value="1.0" type="float" label="The effective scale of embedded points" help="(spread)"/>
+<param name="lazy_use_highly_variable" type="select" label="Use highly variable genes only" help="(use_highly_variable)">
+<option value="True">True</option>
+<option value="False" selected="true">False</option>
+</param>
+<param name="lazy_n_components" value="2" min="0" type="integer" label="The number of dimensions of the UMAP embedding" help="(n_components)"/>
+</when>
+<when value="tl.rank_features">
+<param name="rank_features_groupby" value="louvain" type="text" label="The key of the observations grouping to consider" help="(groupby)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value=" " />
+<add value="." />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_. -]+</validator>
+</param>
+<param name="rank_features_use_raw" type="select" label="Use raw attribute of Anndata if present" help="(use_raw)">
+<option value="True" selected="true">True</option>
+<option value="False">False</option>
+</param>
+<param name="rank_features_groups" value="all" type="text" label="Subset of groups, to which comparison shall be restricted" help="(groups)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+<param name="rank_features_reference" value="rest" type="text" label="Compare each group with respect to this group" help="(reference)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+<param name="rank_features_n_features" value="100" min="1" type="integer" label="The number of features that appear in the returned tables" help="(n_features)"/>
+<param name="rank_features_rankby_abs" type="select" label="Rank genes by the absolute value of the score, not by the score" help="(rankby_abs)">
+<option value="True" >True</option>
+<option value="False" selected="true">False</option>
+</param>
+<param name="rank_features_key_added" value="rank_features_groups" type="text" label="The key in adata.uns information is saved to" help="(key_added)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+<param name="rank_features_method" type="select" label="Method to use" help="(method)">
+<option value="" selected="true">Auto select for ATAC</option>
+<option value="logreg">Logistic regression</option>
+<option value="t-test" >t-test</option>
+<option value="t-test_overestim_var">t-test_overestim_var</option>
+<option value="wilcoxon">Wilcoxon rank sum</option>
+</param>
+<param name="rank_features_corr_method" value="benjamini-hochberg" type="select" label="p-value correction method" help="(corr_method)">
+<option value="benjamini-hochberg">Benjamini Hochberg</option>
+<option value="bonferroni">Bonferroni</option>
+</param>
+</when>
+<when value="tl.find_genes">
+<param name="find_genes_gtf_file" type="data" format="gtf" label="Annotation GTF file" help="(gtf_file)"/>
+<param name="find_genes_key_added" value="transcript_annotation" type="text" label="Key added" help="(key_added)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+<param name="find_genes_upstream" value="2000" min="0" type="integer" label="Upstream" help="(upstream)"/>
+<param name="find_genes_feature_type" value="transcript" type="text" label="Feature type" help="(feature_type)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+<param name="find_genes_annotation" value="HAVANA" type="text" label="Annotation" help="(annotation)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+<param name="find_genes_raw" type="select" label="Raw?" help="(raw)">
+<option value="True">True</option>
+<option value="False" selected="true">False</option>
+</param>
+</when>
+<when value="tl.get_n_clusters">
+<param name="get_n_clusters_n_cluster" value="14" min="1" type="integer" label="Number of clusters" help="(n_cluster)"/>
+<param name="get_n_clusters_method" type="select" label="Clustering method to use" help="(method)">
+<option value="leiden" selected="true">leiden</option>
+<option value="louvain">louvain</option>
+</param>
+<param name="get_n_clusters_range_min" value="0" min="0" type="integer" label="Range minimum" help="(range_min)"/>
+<param name="get_n_clusters_range_max" value="3" min="1" type="integer" label="Range maximum" help="(range_max)"/>
+<param name="get_n_clusters_max_steps" value="20" min="1" type="integer" label="Maximum number of steps" help="(max_steps)"/>
+<param name="get_n_clusters_key_added" value="None" type="text" label="Variable name in obs" help="(key_added)">
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="_" />
+<add value="-" />
+<add value="." />
+<add value=" " />
+<add value="," />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9a-zA-Z_., -]+</validator>
+</param>
+</when>
+<when value="tl.kmeans">
+<param name="kmeans_num_clusters" value="14" min="1" type="integer" label="Number of clusters" help="(num_clusters)"/>
+</when>
+<when value="tl.hc">
+<param name="hc_num_clusters" value="14" min="1" type="integer" label="Number of clusters" help="(num_clusters)"/>
+</when>
+</conditional>
+<expand macro="inputs_common_advanced"/>
+</inputs>
+<outputs>
+<expand macro="anndata_outputs"/>
+</outputs>
+<tests>
+<test expect_num_outputs="2">
+<!-- test 0- pp.lazy -->
+<param name="adata" value="krumsiek11.h5ad" />
+<conditional name="method">
+<param name="method" value="pp.lazy"/>
+<param name="lazy_pp_pca" value="True"/>
+<param name="lazy_svd_solver" value="arpack"/>
+<param name="lazy_nb_pcs" value="5"/>
+<param name="lazy_pp_n_neighbors" value="15"/>
+<param name="lazy_pp_perplexity" value="30"/>
+<param name="lazy_method" value="umap"/>
+<param name="lazy_metric" value="euclidean"/>
+<param name="lazy_min_dist" value="0.5"/>
+<param name="lazy_spread" value="1.0"/>
+<param name="lazy_use_highly_variable" value="False"/>
+<param name="lazy_n_components" value="2"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="hidden_output">
+<assert_contents>
+<has_text_matching expression="esc.pp.lazy"/>
+<has_text_matching expression="adata"/>
+<has_text_matching expression="nb_pcs=5"/>
+</assert_contents>
+</output>
+<output name="anndata_out" file="krumsiek11.pp.lazy.h5ad" ftype="h5ad" compare="sim_size"/>
+</test>
+<test expect_num_outputs="2">
+<!-- test 1- tl.rank_features -->
+<param name="adata" value="krumsiek11.pp.lazy.tl.louvain.h5ad" />
+<conditional name="method">
+<param name="method" value="tl.rank_features"/>
+<param name="rank_features_groupby" value="louvain"/>
+<param name="rank_features_use_raw" value="False"/>
+<param name="rank_features_groups" value="all"/>
+<param name="rank_features_reference" value="rest"/>
+<param name="rank_features_n_features" value="100"/>
+<param name="rank_features_rankby_abs" value="False"/>
+<param name="rank_features_key_added" value="rank_features_groups"/>
+<param name="rank_features_method" value=""/>
+<param name="rank_features_corr_method" value="benjamini-hochberg"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="anndata_out" file="krumsiek11.tl.rank_features.h5ad" ftype="h5ad">
+<assert_contents>
+<has_h5_keys keys="obs, uns, obsm, varm, obsp" />
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="2">
+<!-- test 2-tl.find_genes -->
+<param name="adata" value="chrY.h5ad" />
+<conditional name="method">
+<param name="method" value="tl.find_genes"/>
+<param name="find_genes_gtf_file" value="chrY.gtf"/>
+<param name="find_genes_key_added" value="transcript_annotation"/>
+<param name="find_genes_upstream" value="2000"/>
+<param name="find_genes_feature_type" value="transcript"/>
+<param name="find_genes_annotation" value="HAVANA"/>
+<param name="find_genes_raw" value="False"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="anndata_out" file="chrY_with_transcript_annotation.h5ad" ftype="h5ad" compare="sim_size">
+<assert_contents>
+<has_h5_keys keys="var" />
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="2">
+<!-- test 3-tl.get_n_clusters -->
+<param name="adata" value="krumsiek11.pp.lazy.tl.louvain.h5ad" />
+<conditional name="method">
+<param name="method" value="tl.get_n_clusters"/>
+<param name="get_n_clusters_n_clusters" value="3"/>
+<param name="get_n_clusters_method" value="louvain"/>
+<param name="get_n_clusters_range_min" value="0"/>
+<param name="get_n_clusters_range_max" value="3"/>
+<param name="get_n_clusters_max_steps" value="20"/>
+<conditional name="get_n_clusters_obs_key">
+<param name="get_n_clusters_key_added" value="None"/>
+</conditional>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="anndata_out" file="krumsiek11.tl.get_n_clusters.h5ad" ftype="h5ad"/>
+</test>
+<test expect_num_outputs="2">
+<!-- test 4-tl.kmeans -->
+<param name="adata" value="krumsiek11.pp.lazy.h5ad" />
+<conditional name="method">
+<param name="method" value="tl.kmeans"/>
+<param name="kmeans_num_clusters" value="14"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="anndata_out" file="krumsiek11.tl.kmeans.h5ad" ftype="h5ad"/>
+</test>
+<test expect_num_outputs="2">
+<!-- test 5-tl.hc -->
+<param name="adata" value="krumsiek11.pp.lazy.h5ad" />
+<conditional name="method">
+<param name="method" value="tl.hc"/>
+<param name="hc_num_clusters" value="14"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="anndata_out" file="krumsiek11.tl.hc.h5ad" ftype="h5ad"/>
+</test>
+</tests>
+<help><![CDATA[
+Automatically compute PCA coordinates (`pp.lazy`)
+========================================================================================
+This function automatically computes PCA coordinates, loadings and variance decomposition, a neighborhood graph of
+observations, t-distributed stochastic neighborhood embedding (tSNE) Uniform Manifold Approximation and Projection (UMAP).
+More details on the `episcanpy documentation
+<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.lazy.html>`__
+Find and add gene annotations (`tl.find_genes`)
+========================================================================================
+This function adds a gene annotation to an AnnData (h5ad) file from annotations file (.annotation.gtf).
+Automatically obtain target number of clusters (`tl.getNClusters`)
+========================================================================================
+This function will test different settings of louvain to obtain the target number of clusters.
+More details on the `episcanpy documentation
+<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.getNClusters.html>`__
+Perform kmeans clustering (`tl.kmeans`)
+========================================================================================
+This function will perform kmeans clustering using X_pca fits.
+More details on the `episcanpy documentation
+<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.kmeans.html>`__
+Compute hierarchical clustering using X_pca fits (`tl.hc`)
+===================================================================
+This function computes heirarchical clustering using X_pca fits using random_state=2019.
+More details on the `episcanpy documentation
+<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.hc.html>`__
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > iuc > episcanpy_cluster_embed

comparison cluster_embed.xml @ 0:43711b22f28b draft