scanpy_normalize: normalize.xml comparison

comparison normalize.xml @ 12:0ac2f7d40040 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0

author	iuc
date	Wed, 31 Jul 2024 18:08:37 +0000
parents	51f9a8b21134
children	381401225cbc

comparison

equal deleted inserted replaced

-:51f9a8b21134
+:0ac2f7d40040
-<tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@" profile="@profile@">
+<tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
-<description>with scanpy</description>
+<description>and impute with scanpy</description>
-<expand macro="bio_tools"/>
 <macros>
 <import>macros.xml</import>
 </macros>
+<expand macro="bio_tools"/>
 <expand macro="requirements"/>
 <expand macro="version_command"/>
 <command detect_errors="exit_code"><![CDATA[
 @CMD@
 ]]></command>
 @CMD_read_inputs@
 #if $method.method == "pp.normalize_total"
 sc.pp.normalize_total(
 adata,
-#if str($method.target_sum)!= ''
+#if str($method.target_sum) != ''
 target_sum=$method.target_sum,
 #end if
 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True"
 max_fraction=$method.exclude_highly_expressed.max_fraction,
 #end if
-#if str($method.key_added) != ''
+#if $method.key_added
 key_added='$method.key_added',
 #end if
-#if str($method.layers) != ''
+#if $method.layers
 #if str($method.layers) != 'all'
 layers[str(x.strip()) for x in str($method.layers).split(',')],
 #else
 layers='$method.layers',
 #end if
 adata=adata,
 log=$method.log,
 plot=False,
 copy=False)
+#else if $method.method == "external.pp.magic"
+sc.external.pp.magic(
+adata=adata,
+name_list='$method.name_list',
+knn=$method.knn,
+#if str($method.decay) != ''
+decay=$method.decay,
+#end if
+#if str($method.knn_max) != ''
+knn_max=$method.knn_max,
+#end if
+#if $method.t == -1
+t='auto',
+#else
+t=$method.t,
+#end if
+#if str($method.n_pca) != ''
+n_pca=$method.n_pca,
+#end if
+solver='$method.solver',
+knn_dist='$method.knn_dist',
+random_state=$method.random_state,
+copy=False)
 #end if
 @CMD_anndata_write_outputs@
 ]]></configfile>
 <param argument="method" type="select" label="Method used for normalization">
 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option>
 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
+<option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option>
 </param>
 <when value="pp.normalize_total">
 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
 <conditional name="exclude_highly_expressed">
 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
 <expand macro="pca_random_state"/>
 </when>
 <when value="pp.recipe_seurat">
 <expand macro="param_log"/>
 </when>
+<when value="external.pp.magic">
+<param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
+<option value="all_genes">All genes</option>
+<option value="pca_only">PCA only</option>
+</param>
+<param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>
+<param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails"
+help="If not set, alpha decaying kernel is not used" />
+<param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"
+help="If not set, will be set to 3 * knn" />
+<param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
+help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
+<param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
+help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />
+<param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
+<option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>
+<option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
+</param>
+<param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
+<expand macro="distance_metric_options"/>
+</param>
+<expand macro="param_random_state"/>
+</when>
 </conditional>
 <expand macro="inputs_common_advanced"/>
 </inputs>
 <outputs>
 <expand macro="anndata_outputs"/>
 </outputs>
 <tests>
-<test>
+<test expect_num_outputs="2">
-<!-- test 0 -->
+<!-- test 1 -->
 <param name="adata" value="krumsiek11.h5ad" />
 <conditional name="method">
 <param name="method" value="pp.normalize_total"/>
 <conditional name="exclude_highly_expressed">
 <param name="exclude_highly_expressed" value="False"/>
 <has_text_matching expression="layers='all'"/>
 </assert_contents>
 </output>
 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="2">
-<!-- test 1 -->
+<!-- test 2 -->
 <param name="adata" value="random-randint.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.recipe_zheng17"/>
 <param name="n_top_genes" value="1000"/>
 <param name="log" value="True"/>
 <has_text_matching expression="log=True"/>
 </assert_contents>
 </output>
 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/>
 </test>
-<test>
+<test expect_num_outputs="2">
-<!-- test 2 -->
+<!-- test 3 -->
 <param name="adata" value="paul15_subsample.h5ad" />
 <conditional name="method">
 <param name="method" value="pp.recipe_weinreb17"/>
 <param name="log" value="True"/>
 <param name="mean_threshold" value="0.01"/>
 <has_text_matching expression="random_state=0"/>
 </assert_contents>
 </output>
 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/>
 </test>
-<test>
+<test expect_num_outputs="2">
-<!-- test 3 -->
+<!-- test 4 -->
 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
 <conditional name="method">
 <param name="method" value="pp.recipe_seurat"/>
 <param name="log" value="True"/>
 </conditional>
 <has_text_matching expression="sc.pp.recipe_seurat"/>
 <has_text_matching expression="log=True"/>
 </assert_contents>
 </output>
 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
+</test>
+<test expect_num_outputs="2">
+<!-- test 5 -->
+<param name="adata" value="krumsiek11.h5ad" />
+<conditional name="method">
+<param name="method" value="external.pp.magic"/>
+<param name="name_list" value="all_genes"/>
+<param name="t" value="-1"/>
+<param name="n_pca" value="5"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="hidden_output">
+<assert_contents>
+<has_text_matching expression="external.pp.magic"/>
+<has_text_matching expression="name_list='all_genes'"/>
+<has_text_matching expression="t='auto'"/>
+<has_text_matching expression="n_pca=5"/>
+</assert_contents>
+</output>
+<output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+</test>
+<test expect_num_outputs="2">
+<!-- test 6 -->
+<param name="adata" value="krumsiek11.h5ad" />
+<conditional name="method">
+<param name="method" value="external.pp.magic"/>
+<param name="name_list" value="pca_only"/>
+<param name="t" value="3"/>
+<param name="n_pca" value="5"/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true" />
+</section>
+<output name="hidden_output">
+<assert_contents>
+<has_text_matching expression="external.pp.magic"/>
+<has_text_matching expression="name_list='pca_only'"/>
+<has_text_matching expression="t=3"/>
+<has_text_matching expression="n_pca=5"/>
+</assert_contents>
+</output>
+<output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<assert_stdout>
+<has_text text="X_magic"/>
+</assert_stdout>
 </test>
 </tests>
 <help><![CDATA[
 Normalize total counts per cell (`pp.normalize_per_cell`)
 =========================================================
 the same total count after normalization.
 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
 More details on the `scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.normalize_per_cell.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__
 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
 ==================================================================================================================
 - renormalize after filtering
 - log transform (if needed)
 - scale to unit variance and shift to zero mean
 More details on the `scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_zheng17.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_zheng17.html>`__
 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
 ==============================================================================
 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
 More details on the `scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_weinreb17.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_weinreb17.html>`__
 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
 ==========================================================================
 This uses a particular preprocessing.
 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
 More details on the `scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_seurat.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_seurat.html>`__
+Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
+============================================================================================================
+MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.
+The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018).
+- Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
+- Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.external.pp.magic.html>`__
 ]]></help>
 <expand macro="citations"/>
 </tool>

Mercurial > repos > iuc > scanpy_normalize

comparison normalize.xml @ 12:0ac2f7d40040 draft