comparison normalize.xml @ 12:0ac2f7d40040 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
author iuc
date Wed, 31 Jul 2024 18:08:37 +0000
parents 51f9a8b21134
children 381401225cbc
comparison
equal deleted inserted replaced
11:51f9a8b21134 12:0ac2f7d40040
1 <tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@" profile="@profile@"> 1 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
2 <description>with scanpy</description> 2 <description>and impute with scanpy</description>
3 <expand macro="bio_tools"/>
4 <macros> 3 <macros>
5 <import>macros.xml</import> 4 <import>macros.xml</import>
6 </macros> 5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <expand macro="version_command"/> 8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[ 9 <command detect_errors="exit_code"><![CDATA[
10 @CMD@ 10 @CMD@
11 ]]></command> 11 ]]></command>
15 @CMD_read_inputs@ 15 @CMD_read_inputs@
16 16
17 #if $method.method == "pp.normalize_total" 17 #if $method.method == "pp.normalize_total"
18 sc.pp.normalize_total( 18 sc.pp.normalize_total(
19 adata, 19 adata,
20 #if str($method.target_sum)!= '' 20 #if str($method.target_sum) != ''
21 target_sum=$method.target_sum, 21 target_sum=$method.target_sum,
22 #end if 22 #end if
23 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, 23 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
24 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" 24 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True"
25 max_fraction=$method.exclude_highly_expressed.max_fraction, 25 max_fraction=$method.exclude_highly_expressed.max_fraction,
26 #end if 26 #end if
27 #if str($method.key_added) != '' 27 #if $method.key_added
28 key_added='$method.key_added', 28 key_added='$method.key_added',
29 #end if 29 #end if
30 #if str($method.layers) != '' 30 #if $method.layers
31 #if str($method.layers) != 'all' 31 #if str($method.layers) != 'all'
32 layers[str(x.strip()) for x in str($method.layers).split(',')], 32 layers[str(x.strip()) for x in str($method.layers).split(',')],
33 #else 33 #else
34 layers='$method.layers', 34 layers='$method.layers',
35 #end if 35 #end if
63 adata=adata, 63 adata=adata,
64 log=$method.log, 64 log=$method.log,
65 plot=False, 65 plot=False,
66 copy=False) 66 copy=False)
67 67
68 #else if $method.method == "external.pp.magic"
69 sc.external.pp.magic(
70 adata=adata,
71 name_list='$method.name_list',
72 knn=$method.knn,
73 #if str($method.decay) != ''
74 decay=$method.decay,
75 #end if
76 #if str($method.knn_max) != ''
77 knn_max=$method.knn_max,
78 #end if
79 #if $method.t == -1
80 t='auto',
81 #else
82 t=$method.t,
83 #end if
84 #if str($method.n_pca) != ''
85 n_pca=$method.n_pca,
86 #end if
87 solver='$method.solver',
88 knn_dist='$method.knn_dist',
89 random_state=$method.random_state,
90 copy=False)
68 #end if 91 #end if
69 92
70 @CMD_anndata_write_outputs@ 93 @CMD_anndata_write_outputs@
71 94
72 ]]></configfile> 95 ]]></configfile>
77 <param argument="method" type="select" label="Method used for normalization"> 100 <param argument="method" type="select" label="Method used for normalization">
78 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option> 101 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option>
79 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option> 102 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
80 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option> 103 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
81 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option> 104 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
105 <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option>
82 </param> 106 </param>
83 <when value="pp.normalize_total"> 107 <when value="pp.normalize_total">
84 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> 108 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
85 <conditional name="exclude_highly_expressed"> 109 <conditional name="exclude_highly_expressed">
86 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> 110 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
117 <expand macro="pca_random_state"/> 141 <expand macro="pca_random_state"/>
118 </when> 142 </when>
119 <when value="pp.recipe_seurat"> 143 <when value="pp.recipe_seurat">
120 <expand macro="param_log"/> 144 <expand macro="param_log"/>
121 </when> 145 </when>
146 <when value="external.pp.magic">
147 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
148 <option value="all_genes">All genes</option>
149 <option value="pca_only">PCA only</option>
150 </param>
151 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>
152 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails"
153 help="If not set, alpha decaying kernel is not used" />
154 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"
155 help="If not set, will be set to 3 * knn" />
156 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
157 help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
158 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
159 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />
160 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
161 <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>
162 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
163 </param>
164 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
165 <expand macro="distance_metric_options"/>
166 </param>
167 <expand macro="param_random_state"/>
168 </when>
122 </conditional> 169 </conditional>
123 <expand macro="inputs_common_advanced"/> 170 <expand macro="inputs_common_advanced"/>
124 </inputs> 171 </inputs>
125 <outputs> 172 <outputs>
126 <expand macro="anndata_outputs"/> 173 <expand macro="anndata_outputs"/>
127 </outputs> 174 </outputs>
128 <tests> 175 <tests>
129 <test> 176 <test expect_num_outputs="2">
130 <!-- test 0 --> 177 <!-- test 1 -->
131 <param name="adata" value="krumsiek11.h5ad" /> 178 <param name="adata" value="krumsiek11.h5ad" />
132 <conditional name="method"> 179 <conditional name="method">
133 <param name="method" value="pp.normalize_total"/> 180 <param name="method" value="pp.normalize_total"/>
134 <conditional name="exclude_highly_expressed"> 181 <conditional name="exclude_highly_expressed">
135 <param name="exclude_highly_expressed" value="False"/> 182 <param name="exclude_highly_expressed" value="False"/>
149 <has_text_matching expression="layers='all'"/> 196 <has_text_matching expression="layers='all'"/>
150 </assert_contents> 197 </assert_contents>
151 </output> 198 </output>
152 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 199 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
153 </test> 200 </test>
154 <test> 201 <test expect_num_outputs="2">
155 <!-- test 1 --> 202 <!-- test 2 -->
156 <param name="adata" value="random-randint.h5ad"/> 203 <param name="adata" value="random-randint.h5ad"/>
157 <conditional name="method"> 204 <conditional name="method">
158 <param name="method" value="pp.recipe_zheng17"/> 205 <param name="method" value="pp.recipe_zheng17"/>
159 <param name="n_top_genes" value="1000"/> 206 <param name="n_top_genes" value="1000"/>
160 <param name="log" value="True"/> 207 <param name="log" value="True"/>
169 <has_text_matching expression="log=True"/> 216 <has_text_matching expression="log=True"/>
170 </assert_contents> 217 </assert_contents>
171 </output> 218 </output>
172 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> 219 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/>
173 </test> 220 </test>
174 <test> 221 <test expect_num_outputs="2">
175 <!-- test 2 --> 222 <!-- test 3 -->
176 <param name="adata" value="paul15_subsample.h5ad" /> 223 <param name="adata" value="paul15_subsample.h5ad" />
177 <conditional name="method"> 224 <conditional name="method">
178 <param name="method" value="pp.recipe_weinreb17"/> 225 <param name="method" value="pp.recipe_weinreb17"/>
179 <param name="log" value="True"/> 226 <param name="log" value="True"/>
180 <param name="mean_threshold" value="0.01"/> 227 <param name="mean_threshold" value="0.01"/>
197 <has_text_matching expression="random_state=0"/> 244 <has_text_matching expression="random_state=0"/>
198 </assert_contents> 245 </assert_contents>
199 </output> 246 </output>
200 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> 247 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/>
201 </test> 248 </test>
202 <test> 249 <test expect_num_outputs="2">
203 <!-- test 3 --> 250 <!-- test 4 -->
204 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> 251 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
205 <conditional name="method"> 252 <conditional name="method">
206 <param name="method" value="pp.recipe_seurat"/> 253 <param name="method" value="pp.recipe_seurat"/>
207 <param name="log" value="True"/> 254 <param name="log" value="True"/>
208 </conditional> 255 </conditional>
214 <has_text_matching expression="sc.pp.recipe_seurat"/> 261 <has_text_matching expression="sc.pp.recipe_seurat"/>
215 <has_text_matching expression="log=True"/> 262 <has_text_matching expression="log=True"/>
216 </assert_contents> 263 </assert_contents>
217 </output> 264 </output>
218 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> 265 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
266 </test>
267 <test expect_num_outputs="2">
268 <!-- test 5 -->
269 <param name="adata" value="krumsiek11.h5ad" />
270 <conditional name="method">
271 <param name="method" value="external.pp.magic"/>
272 <param name="name_list" value="all_genes"/>
273 <param name="t" value="-1"/>
274 <param name="n_pca" value="5"/>
275 </conditional>
276 <section name="advanced_common">
277 <param name="show_log" value="true" />
278 </section>
279 <output name="hidden_output">
280 <assert_contents>
281 <has_text_matching expression="external.pp.magic"/>
282 <has_text_matching expression="name_list='all_genes'"/>
283 <has_text_matching expression="t='auto'"/>
284 <has_text_matching expression="n_pca=5"/>
285 </assert_contents>
286 </output>
287 <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
288 </test>
289 <test expect_num_outputs="2">
290 <!-- test 6 -->
291 <param name="adata" value="krumsiek11.h5ad" />
292 <conditional name="method">
293 <param name="method" value="external.pp.magic"/>
294 <param name="name_list" value="pca_only"/>
295 <param name="t" value="3"/>
296 <param name="n_pca" value="5"/>
297 </conditional>
298 <section name="advanced_common">
299 <param name="show_log" value="true" />
300 </section>
301 <output name="hidden_output">
302 <assert_contents>
303 <has_text_matching expression="external.pp.magic"/>
304 <has_text_matching expression="name_list='pca_only'"/>
305 <has_text_matching expression="t=3"/>
306 <has_text_matching expression="n_pca=5"/>
307 </assert_contents>
308 </output>
309 <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
310 <assert_stdout>
311 <has_text text="X_magic"/>
312 </assert_stdout>
219 </test> 313 </test>
220 </tests> 314 </tests>
221 <help><![CDATA[ 315 <help><![CDATA[
222 Normalize total counts per cell (`pp.normalize_per_cell`) 316 Normalize total counts per cell (`pp.normalize_per_cell`)
223 ========================================================= 317 =========================================================
226 the same total count after normalization. 320 the same total count after normalization.
227 321
228 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. 322 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
229 323
230 More details on the `scanpy documentation 324 More details on the `scanpy documentation
231 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.normalize_per_cell.html>`__ 325 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__
232 326
233 327
234 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) 328 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
235 ================================================================================================================== 329 ==================================================================================================================
236 330
245 - renormalize after filtering 339 - renormalize after filtering
246 - log transform (if needed) 340 - log transform (if needed)
247 - scale to unit variance and shift to zero mean 341 - scale to unit variance and shift to zero mean
248 342
249 More details on the `scanpy documentation 343 More details on the `scanpy documentation
250 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_zheng17.html>`__ 344 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_zheng17.html>`__
251 345
252 346
253 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) 347 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
254 ============================================================================== 348 ==============================================================================
255 349
256 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. 350 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
257 351
258 More details on the `scanpy documentation 352 More details on the `scanpy documentation
259 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_weinreb17.html>`__ 353 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_weinreb17.html>`__
260 354
261 355
262 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) 356 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
263 ========================================================================== 357 ==========================================================================
264 358
265 This uses a particular preprocessing. 359 This uses a particular preprocessing.
266 360
267 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. 361 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
268 362
269 More details on the `scanpy documentation 363 More details on the `scanpy documentation
270 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_seurat.html>`__ 364 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_seurat.html>`__
365
366
367 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
368 ============================================================================================================
369
370 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.
371
372 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018).
373
374 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
375 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.
376
377 More details on the `scanpy documentation
378 <https://scanpy.readthedocs.io/en/stable/api/scanpy.external.pp.magic.html>`__
271 379
272 ]]></help> 380 ]]></help>
273 <expand macro="citations"/> 381 <expand macro="citations"/>
274 </tool> 382 </tool>