Mercurial > repos > iuc > scanpy_normalize
comparison normalize.xml @ 12:0ac2f7d40040 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
| author | iuc |
|---|---|
| date | Wed, 31 Jul 2024 18:08:37 +0000 |
| parents | 51f9a8b21134 |
| children | 381401225cbc |
comparison
equal
deleted
inserted
replaced
| 11:51f9a8b21134 | 12:0ac2f7d40040 |
|---|---|
| 1 <tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@" profile="@profile@"> | 1 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> |
| 2 <description>with scanpy</description> | 2 <description>and impute with scanpy</description> |
| 3 <expand macro="bio_tools"/> | |
| 4 <macros> | 3 <macros> |
| 5 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 6 </macros> | 5 </macros> |
| 6 <expand macro="bio_tools"/> | |
| 7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
| 8 <expand macro="version_command"/> | 8 <expand macro="version_command"/> |
| 9 <command detect_errors="exit_code"><![CDATA[ | 9 <command detect_errors="exit_code"><![CDATA[ |
| 10 @CMD@ | 10 @CMD@ |
| 11 ]]></command> | 11 ]]></command> |
| 15 @CMD_read_inputs@ | 15 @CMD_read_inputs@ |
| 16 | 16 |
| 17 #if $method.method == "pp.normalize_total" | 17 #if $method.method == "pp.normalize_total" |
| 18 sc.pp.normalize_total( | 18 sc.pp.normalize_total( |
| 19 adata, | 19 adata, |
| 20 #if str($method.target_sum)!= '' | 20 #if str($method.target_sum) != '' |
| 21 target_sum=$method.target_sum, | 21 target_sum=$method.target_sum, |
| 22 #end if | 22 #end if |
| 23 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, | 23 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, |
| 24 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" | 24 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" |
| 25 max_fraction=$method.exclude_highly_expressed.max_fraction, | 25 max_fraction=$method.exclude_highly_expressed.max_fraction, |
| 26 #end if | 26 #end if |
| 27 #if str($method.key_added) != '' | 27 #if $method.key_added |
| 28 key_added='$method.key_added', | 28 key_added='$method.key_added', |
| 29 #end if | 29 #end if |
| 30 #if str($method.layers) != '' | 30 #if $method.layers |
| 31 #if str($method.layers) != 'all' | 31 #if str($method.layers) != 'all' |
| 32 layers[str(x.strip()) for x in str($method.layers).split(',')], | 32 layers[str(x.strip()) for x in str($method.layers).split(',')], |
| 33 #else | 33 #else |
| 34 layers='$method.layers', | 34 layers='$method.layers', |
| 35 #end if | 35 #end if |
| 63 adata=adata, | 63 adata=adata, |
| 64 log=$method.log, | 64 log=$method.log, |
| 65 plot=False, | 65 plot=False, |
| 66 copy=False) | 66 copy=False) |
| 67 | 67 |
| 68 #else if $method.method == "external.pp.magic" | |
| 69 sc.external.pp.magic( | |
| 70 adata=adata, | |
| 71 name_list='$method.name_list', | |
| 72 knn=$method.knn, | |
| 73 #if str($method.decay) != '' | |
| 74 decay=$method.decay, | |
| 75 #end if | |
| 76 #if str($method.knn_max) != '' | |
| 77 knn_max=$method.knn_max, | |
| 78 #end if | |
| 79 #if $method.t == -1 | |
| 80 t='auto', | |
| 81 #else | |
| 82 t=$method.t, | |
| 83 #end if | |
| 84 #if str($method.n_pca) != '' | |
| 85 n_pca=$method.n_pca, | |
| 86 #end if | |
| 87 solver='$method.solver', | |
| 88 knn_dist='$method.knn_dist', | |
| 89 random_state=$method.random_state, | |
| 90 copy=False) | |
| 68 #end if | 91 #end if |
| 69 | 92 |
| 70 @CMD_anndata_write_outputs@ | 93 @CMD_anndata_write_outputs@ |
| 71 | 94 |
| 72 ]]></configfile> | 95 ]]></configfile> |
| 77 <param argument="method" type="select" label="Method used for normalization"> | 100 <param argument="method" type="select" label="Method used for normalization"> |
| 78 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option> | 101 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option> |
| 79 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option> | 102 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option> |
| 80 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option> | 103 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option> |
| 81 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option> | 104 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option> |
| 105 <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option> | |
| 82 </param> | 106 </param> |
| 83 <when value="pp.normalize_total"> | 107 <when value="pp.normalize_total"> |
| 84 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> | 108 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> |
| 85 <conditional name="exclude_highly_expressed"> | 109 <conditional name="exclude_highly_expressed"> |
| 86 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> | 110 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> |
| 117 <expand macro="pca_random_state"/> | 141 <expand macro="pca_random_state"/> |
| 118 </when> | 142 </when> |
| 119 <when value="pp.recipe_seurat"> | 143 <when value="pp.recipe_seurat"> |
| 120 <expand macro="param_log"/> | 144 <expand macro="param_log"/> |
| 121 </when> | 145 </when> |
| 146 <when value="external.pp.magic"> | |
| 147 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory"> | |
| 148 <option value="all_genes">All genes</option> | |
| 149 <option value="pca_only">PCA only</option> | |
| 150 </param> | |
| 151 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/> | |
| 152 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" | |
| 153 help="If not set, alpha decaying kernel is not used" /> | |
| 154 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" | |
| 155 help="If not set, will be set to 3 * knn" /> | |
| 156 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" | |
| 157 help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." /> | |
| 158 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods" | |
| 159 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." /> | |
| 160 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory"> | |
| 161 <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option> | |
| 162 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option> | |
| 163 </param> | |
| 164 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html"> | |
| 165 <expand macro="distance_metric_options"/> | |
| 166 </param> | |
| 167 <expand macro="param_random_state"/> | |
| 168 </when> | |
| 122 </conditional> | 169 </conditional> |
| 123 <expand macro="inputs_common_advanced"/> | 170 <expand macro="inputs_common_advanced"/> |
| 124 </inputs> | 171 </inputs> |
| 125 <outputs> | 172 <outputs> |
| 126 <expand macro="anndata_outputs"/> | 173 <expand macro="anndata_outputs"/> |
| 127 </outputs> | 174 </outputs> |
| 128 <tests> | 175 <tests> |
| 129 <test> | 176 <test expect_num_outputs="2"> |
| 130 <!-- test 0 --> | 177 <!-- test 1 --> |
| 131 <param name="adata" value="krumsiek11.h5ad" /> | 178 <param name="adata" value="krumsiek11.h5ad" /> |
| 132 <conditional name="method"> | 179 <conditional name="method"> |
| 133 <param name="method" value="pp.normalize_total"/> | 180 <param name="method" value="pp.normalize_total"/> |
| 134 <conditional name="exclude_highly_expressed"> | 181 <conditional name="exclude_highly_expressed"> |
| 135 <param name="exclude_highly_expressed" value="False"/> | 182 <param name="exclude_highly_expressed" value="False"/> |
| 149 <has_text_matching expression="layers='all'"/> | 196 <has_text_matching expression="layers='all'"/> |
| 150 </assert_contents> | 197 </assert_contents> |
| 151 </output> | 198 </output> |
| 152 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | 199 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> |
| 153 </test> | 200 </test> |
| 154 <test> | 201 <test expect_num_outputs="2"> |
| 155 <!-- test 1 --> | 202 <!-- test 2 --> |
| 156 <param name="adata" value="random-randint.h5ad"/> | 203 <param name="adata" value="random-randint.h5ad"/> |
| 157 <conditional name="method"> | 204 <conditional name="method"> |
| 158 <param name="method" value="pp.recipe_zheng17"/> | 205 <param name="method" value="pp.recipe_zheng17"/> |
| 159 <param name="n_top_genes" value="1000"/> | 206 <param name="n_top_genes" value="1000"/> |
| 160 <param name="log" value="True"/> | 207 <param name="log" value="True"/> |
| 169 <has_text_matching expression="log=True"/> | 216 <has_text_matching expression="log=True"/> |
| 170 </assert_contents> | 217 </assert_contents> |
| 171 </output> | 218 </output> |
| 172 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> | 219 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> |
| 173 </test> | 220 </test> |
| 174 <test> | 221 <test expect_num_outputs="2"> |
| 175 <!-- test 2 --> | 222 <!-- test 3 --> |
| 176 <param name="adata" value="paul15_subsample.h5ad" /> | 223 <param name="adata" value="paul15_subsample.h5ad" /> |
| 177 <conditional name="method"> | 224 <conditional name="method"> |
| 178 <param name="method" value="pp.recipe_weinreb17"/> | 225 <param name="method" value="pp.recipe_weinreb17"/> |
| 179 <param name="log" value="True"/> | 226 <param name="log" value="True"/> |
| 180 <param name="mean_threshold" value="0.01"/> | 227 <param name="mean_threshold" value="0.01"/> |
| 197 <has_text_matching expression="random_state=0"/> | 244 <has_text_matching expression="random_state=0"/> |
| 198 </assert_contents> | 245 </assert_contents> |
| 199 </output> | 246 </output> |
| 200 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> | 247 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> |
| 201 </test> | 248 </test> |
| 202 <test> | 249 <test expect_num_outputs="2"> |
| 203 <!-- test 3 --> | 250 <!-- test 4 --> |
| 204 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> | 251 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> |
| 205 <conditional name="method"> | 252 <conditional name="method"> |
| 206 <param name="method" value="pp.recipe_seurat"/> | 253 <param name="method" value="pp.recipe_seurat"/> |
| 207 <param name="log" value="True"/> | 254 <param name="log" value="True"/> |
| 208 </conditional> | 255 </conditional> |
| 214 <has_text_matching expression="sc.pp.recipe_seurat"/> | 261 <has_text_matching expression="sc.pp.recipe_seurat"/> |
| 215 <has_text_matching expression="log=True"/> | 262 <has_text_matching expression="log=True"/> |
| 216 </assert_contents> | 263 </assert_contents> |
| 217 </output> | 264 </output> |
| 218 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> | 265 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> |
| 266 </test> | |
| 267 <test expect_num_outputs="2"> | |
| 268 <!-- test 5 --> | |
| 269 <param name="adata" value="krumsiek11.h5ad" /> | |
| 270 <conditional name="method"> | |
| 271 <param name="method" value="external.pp.magic"/> | |
| 272 <param name="name_list" value="all_genes"/> | |
| 273 <param name="t" value="-1"/> | |
| 274 <param name="n_pca" value="5"/> | |
| 275 </conditional> | |
| 276 <section name="advanced_common"> | |
| 277 <param name="show_log" value="true" /> | |
| 278 </section> | |
| 279 <output name="hidden_output"> | |
| 280 <assert_contents> | |
| 281 <has_text_matching expression="external.pp.magic"/> | |
| 282 <has_text_matching expression="name_list='all_genes'"/> | |
| 283 <has_text_matching expression="t='auto'"/> | |
| 284 <has_text_matching expression="n_pca=5"/> | |
| 285 </assert_contents> | |
| 286 </output> | |
| 287 <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 288 </test> | |
| 289 <test expect_num_outputs="2"> | |
| 290 <!-- test 6 --> | |
| 291 <param name="adata" value="krumsiek11.h5ad" /> | |
| 292 <conditional name="method"> | |
| 293 <param name="method" value="external.pp.magic"/> | |
| 294 <param name="name_list" value="pca_only"/> | |
| 295 <param name="t" value="3"/> | |
| 296 <param name="n_pca" value="5"/> | |
| 297 </conditional> | |
| 298 <section name="advanced_common"> | |
| 299 <param name="show_log" value="true" /> | |
| 300 </section> | |
| 301 <output name="hidden_output"> | |
| 302 <assert_contents> | |
| 303 <has_text_matching expression="external.pp.magic"/> | |
| 304 <has_text_matching expression="name_list='pca_only'"/> | |
| 305 <has_text_matching expression="t=3"/> | |
| 306 <has_text_matching expression="n_pca=5"/> | |
| 307 </assert_contents> | |
| 308 </output> | |
| 309 <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 310 <assert_stdout> | |
| 311 <has_text text="X_magic"/> | |
| 312 </assert_stdout> | |
| 219 </test> | 313 </test> |
| 220 </tests> | 314 </tests> |
| 221 <help><![CDATA[ | 315 <help><![CDATA[ |
| 222 Normalize total counts per cell (`pp.normalize_per_cell`) | 316 Normalize total counts per cell (`pp.normalize_per_cell`) |
| 223 ========================================================= | 317 ========================================================= |
| 226 the same total count after normalization. | 320 the same total count after normalization. |
| 227 | 321 |
| 228 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. | 322 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. |
| 229 | 323 |
| 230 More details on the `scanpy documentation | 324 More details on the `scanpy documentation |
| 231 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.normalize_per_cell.html>`__ | 325 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__ |
| 232 | 326 |
| 233 | 327 |
| 234 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) | 328 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) |
| 235 ================================================================================================================== | 329 ================================================================================================================== |
| 236 | 330 |
| 245 - renormalize after filtering | 339 - renormalize after filtering |
| 246 - log transform (if needed) | 340 - log transform (if needed) |
| 247 - scale to unit variance and shift to zero mean | 341 - scale to unit variance and shift to zero mean |
| 248 | 342 |
| 249 More details on the `scanpy documentation | 343 More details on the `scanpy documentation |
| 250 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_zheng17.html>`__ | 344 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_zheng17.html>`__ |
| 251 | 345 |
| 252 | 346 |
| 253 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) | 347 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) |
| 254 ============================================================================== | 348 ============================================================================== |
| 255 | 349 |
| 256 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | 350 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. |
| 257 | 351 |
| 258 More details on the `scanpy documentation | 352 More details on the `scanpy documentation |
| 259 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_weinreb17.html>`__ | 353 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_weinreb17.html>`__ |
| 260 | 354 |
| 261 | 355 |
| 262 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) | 356 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) |
| 263 ========================================================================== | 357 ========================================================================== |
| 264 | 358 |
| 265 This uses a particular preprocessing. | 359 This uses a particular preprocessing. |
| 266 | 360 |
| 267 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | 361 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. |
| 268 | 362 |
| 269 More details on the `scanpy documentation | 363 More details on the `scanpy documentation |
| 270 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_seurat.html>`__ | 364 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_seurat.html>`__ |
| 365 | |
| 366 | |
| 367 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`) | |
| 368 ============================================================================================================ | |
| 369 | |
| 370 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold. | |
| 371 | |
| 372 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). | |
| 373 | |
| 374 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability. | |
| 375 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements. | |
| 376 | |
| 377 More details on the `scanpy documentation | |
| 378 <https://scanpy.readthedocs.io/en/stable/api/scanpy.external.pp.magic.html>`__ | |
| 271 | 379 |
| 272 ]]></help> | 380 ]]></help> |
| 273 <expand macro="citations"/> | 381 <expand macro="citations"/> |
| 274 </tool> | 382 </tool> |
