Mercurial > repos > iuc > scanpy_normalize
comparison normalize.xml @ 0:61f924b5e618 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
| author | iuc |
|---|---|
| date | Mon, 04 Mar 2019 10:11:54 -0500 |
| parents | |
| children | 05e99dc40db1 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:61f924b5e618 |
|---|---|
| 1 <tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@"> | |
| 2 <description></description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <expand macro="version_command"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 @CMD@ | |
| 10 ]]></command> | |
| 11 <configfiles> | |
| 12 <configfile name="script_file"><![CDATA[ | |
| 13 @CMD_imports@ | |
| 14 @CMD_read_inputs@ | |
| 15 | |
| 16 #if $method.method == "pp.normalize_per_cell" | |
| 17 sc.pp.normalize_per_cell( | |
| 18 data=adata, | |
| 19 #if $method.counts_per_cell_after | |
| 20 counts_per_cell_after=$method.counts_per_cell_after, | |
| 21 #end if | |
| 22 #if $method.counts_per_cell | |
| 23 counts_per_cell=np.loadtxt('$method.counts_per_cell'), | |
| 24 #end if | |
| 25 key_n_counts='$method.key_n_counts', | |
| 26 copy=False) | |
| 27 adata.obs.to_csv('$anndata_obs', sep='\t') | |
| 28 #elif $method.method == "pp.recipe_zheng17" | |
| 29 sc.pp.recipe_zheng17( | |
| 30 adata=adata, | |
| 31 n_top_genes=$method.n_top_genes, | |
| 32 log=$method.log, | |
| 33 plot=False, | |
| 34 copy=False) | |
| 35 #elif $method.method == "pp.recipe_weinreb17" | |
| 36 sc.pp.recipe_weinreb17( | |
| 37 adata=adata, | |
| 38 log=$method.log, | |
| 39 mean_threshold=$method.mean_threshold, | |
| 40 cv_threshold=$method.cv_threshold, | |
| 41 n_pcs=$method.n_pcs, | |
| 42 svd_solver='$method.svd_solver', | |
| 43 random_state=$method.random_state, | |
| 44 copy=False) | |
| 45 #elif $method.method == "pp.recipe_seurat" | |
| 46 sc.pp.recipe_seurat( | |
| 47 adata=adata, | |
| 48 log=$method.log, | |
| 49 plot=False, | |
| 50 copy=False) | |
| 51 #elif $method.method == "pp.log1p" | |
| 52 sc.pp.log1p( | |
| 53 data=adata, | |
| 54 copy=False) | |
| 55 #elif $method.method == "pp.scale" | |
| 56 sc.pp.scale( | |
| 57 data=adata, | |
| 58 zero_center=$method.zero_center, | |
| 59 #if $method.max_value | |
| 60 max_value=$method.max_value, | |
| 61 #end if | |
| 62 copy=False) | |
| 63 #elif $method.method == "pp.sqrt" | |
| 64 sc.pp.sqrt( | |
| 65 data=adata, | |
| 66 copy=False) | |
| 67 #elif $method.method == "pp.downsample_counts" | |
| 68 sc.pp.downsample_counts( | |
| 69 adata=adata, | |
| 70 target_counts=$method.target_counts, | |
| 71 random_state=$method.random_state, | |
| 72 copy=False) | |
| 73 #end if | |
| 74 | |
| 75 @CMD_anndata_write_outputs@ | |
| 76 | |
| 77 ]]></configfile> | |
| 78 </configfiles> | |
| 79 <inputs> | |
| 80 <expand macro="inputs_anndata"/> | |
| 81 <conditional name="method"> | |
| 82 <param argument="method" type="select" label="Method used for plotting"> | |
| 83 <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option> | |
| 84 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option> | |
| 85 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option> | |
| 86 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option> | |
| 87 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> | |
| 88 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> | |
| 89 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> | |
| 90 <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option> | |
| 91 </param> | |
| 92 <when value="pp.normalize_per_cell"> | |
| 93 <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/> | |
| 94 <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/> | |
| 95 <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/> | |
| 96 </when> | |
| 97 <when value="pp.recipe_zheng17"> | |
| 98 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/> | |
| 99 <expand macro="param_log"/> | |
| 100 </when> | |
| 101 <when value="pp.recipe_weinreb17"> | |
| 102 <expand macro="param_log"/> | |
| 103 <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/> | |
| 104 <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/> | |
| 105 <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/> | |
| 106 <expand macro="svd_solver"/> | |
| 107 <expand macro="pca_random_state"/> | |
| 108 </when> | |
| 109 <when value="pp.recipe_seurat"> | |
| 110 <expand macro="param_log"/> | |
| 111 </when> | |
| 112 <when value="pp.log1p"/> | |
| 113 <when value="pp.scale"> | |
| 114 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
| 115 label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> | |
| 116 <param argument="max_value" type="float" value="" optional="true" label="Maximum value" | |
| 117 help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> | |
| 118 </when> | |
| 119 <when value="pp.sqrt"/> | |
| 120 <when value="pp.downsample_counts"> | |
| 121 <param argument="target_counts" type="integer" min="0" value="20000" | |
| 122 label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/> | |
| 123 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> | |
| 124 </when> | |
| 125 </conditional> | |
| 126 <expand macro="anndata_output_format"/> | |
| 127 </inputs> | |
| 128 <outputs> | |
| 129 <expand macro="anndata_outputs"/> | |
| 130 <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations"> | |
| 131 <filter>method['method'] == 'pp.normalize_per_cell'</filter> | |
| 132 </data> | |
| 133 </outputs> | |
| 134 <tests> | |
| 135 <test> | |
| 136 <conditional name="input"> | |
| 137 <param name="format" value="h5ad" /> | |
| 138 <param name="adata" value="krumsiek11.h5ad" /> | |
| 139 </conditional> | |
| 140 <conditional name="method"> | |
| 141 <param name="method" value="pp.normalize_per_cell"/> | |
| 142 <param name="counts_per_cell_after" value="2"/> | |
| 143 <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/> | |
| 144 <param name="key_n_counts" value="n_counts"/> | |
| 145 </conditional> | |
| 146 <param name="anndata_output_format" value="h5ad"/> | |
| 147 <assert_stdout> | |
| 148 <has_text_matching expression="sc.pp.normalize_per_cell"/> | |
| 149 <has_text_matching expression="counts_per_cell_after=2.0"/> | |
| 150 <has_text_matching expression="counts_per_cell=np.loadtxt"/> | |
| 151 <has_text_matching expression="key_n_counts='n_counts'"/> | |
| 152 </assert_stdout> | |
| 153 <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
| 154 <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/> | |
| 155 </test> | |
| 156 <test> | |
| 157 <conditional name="input"> | |
| 158 <param name="format" value="h5ad" /> | |
| 159 <param name="adata" value="random-randint.h5ad"/> | |
| 160 </conditional> | |
| 161 <conditional name="method"> | |
| 162 <param name="method" value="pp.recipe_zheng17"/> | |
| 163 <param name="n_top_genes" value="1000"/> | |
| 164 <param name="log" value="True"/> | |
| 165 </conditional> | |
| 166 <param name="anndata_output_format" value="h5ad"/> | |
| 167 <assert_stdout> | |
| 168 <has_text_matching expression="sc.pp.recipe_zheng17"/> | |
| 169 <has_text_matching expression="n_top_genes=1000"/> | |
| 170 <has_text_matching expression="log=True"/> | |
| 171 </assert_stdout> | |
| 172 <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/> | |
| 173 </test> | |
| 174 <test> | |
| 175 <conditional name="input"> | |
| 176 <param name="format" value="h5ad" /> | |
| 177 <param name="adata" value="paul15_subsample.h5ad" /> | |
| 178 </conditional> | |
| 179 <conditional name="method"> | |
| 180 <param name="method" value="pp.recipe_weinreb17"/> | |
| 181 <param name="log" value="True"/> | |
| 182 <param name="mean_threshold" value="0.01"/> | |
| 183 <param name="cv_threshold" value="2.0"/> | |
| 184 <param name="n_pcs" value="50"/> | |
| 185 <param name="svd_solver" value="randomized"/> | |
| 186 <param name="random_state" value="0"/> | |
| 187 </conditional> | |
| 188 <param name="anndata_output_format" value="h5ad" /> | |
| 189 <assert_stdout> | |
| 190 <has_text_matching expression="sc.pp.recipe_weinreb17"/> | |
| 191 <has_text_matching expression="log=True"/> | |
| 192 <has_text_matching expression="mean_threshold=0.01"/> | |
| 193 <has_text_matching expression="cv_threshold=2.0"/> | |
| 194 <has_text_matching expression="n_pcs=50"/> | |
| 195 <has_text_matching expression="svd_solver='randomized'"/> | |
| 196 <has_text_matching expression="random_state=0"/> | |
| 197 </assert_stdout> | |
| 198 <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> | |
| 199 </test> | |
| 200 <test> | |
| 201 <conditional name="input"> | |
| 202 <param name="format" value="h5ad" /> | |
| 203 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> | |
| 204 </conditional> | |
| 205 <conditional name="method"> | |
| 206 <param name="method" value="pp.recipe_seurat"/> | |
| 207 <param name="log" value="True"/> | |
| 208 </conditional> | |
| 209 <param name="anndata_output_format" value="h5ad"/> | |
| 210 <assert_stdout> | |
| 211 <has_text_matching expression="sc.pp.recipe_seurat"/> | |
| 212 <has_text_matching expression="log=True"/> | |
| 213 </assert_stdout> | |
| 214 <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/> | |
| 215 </test> | |
| 216 <test> | |
| 217 <conditional name="input"> | |
| 218 <param name="format" value="h5ad" /> | |
| 219 <param name="adata" value="krumsiek11.h5ad" /> | |
| 220 </conditional> | |
| 221 <conditional name="method"> | |
| 222 <param name="method" value="pp.log1p"/> | |
| 223 </conditional> | |
| 224 <param name="anndata_output_format" value="h5ad" /> | |
| 225 <assert_stdout> | |
| 226 <has_text_matching expression="sc.pp.log1p"/> | |
| 227 </assert_stdout> | |
| 228 <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
| 229 </test> | |
| 230 <test> | |
| 231 <conditional name="input"> | |
| 232 <param name="format" value="h5ad" /> | |
| 233 <param name="adata" value="krumsiek11.h5ad" /> | |
| 234 </conditional> | |
| 235 <conditional name="method"> | |
| 236 <param name="method" value="pp.scale"/> | |
| 237 <param name="zero_center" value="true"/> | |
| 238 </conditional> | |
| 239 <param name="anndata_output_format" value="h5ad" /> | |
| 240 <assert_stdout> | |
| 241 <has_text_matching expression="sc.pp.scale"/> | |
| 242 <has_text_matching expression="zero_center=True"/> | |
| 243 </assert_stdout> | |
| 244 <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
| 245 </test> | |
| 246 <test> | |
| 247 <conditional name="input"> | |
| 248 <param name="format" value="h5ad" /> | |
| 249 <param name="adata" value="krumsiek11.h5ad" /> | |
| 250 </conditional> | |
| 251 <conditional name="method"> | |
| 252 <param name="method" value="pp.scale"/> | |
| 253 <param name="zero_center" value="true"/> | |
| 254 <param name="max_value" value="10"/> | |
| 255 </conditional> | |
| 256 <param name="anndata_output_format" value="h5ad" /> | |
| 257 <assert_stdout> | |
| 258 <has_text_matching expression="sc.pp.scale"/> | |
| 259 <has_text_matching expression="zero_center=True"/> | |
| 260 <has_text_matching expression="max_value=10.0"/> | |
| 261 </assert_stdout> | |
| 262 <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
| 263 </test> | |
| 264 <test> | |
| 265 <conditional name="input"> | |
| 266 <param name="format" value="h5ad" /> | |
| 267 <param name="adata" value="krumsiek11.h5ad" /> | |
| 268 </conditional> | |
| 269 <conditional name="method"> | |
| 270 <param name="method" value="pp.sqrt"/> | |
| 271 </conditional> | |
| 272 <param name="anndata_output_format" value="h5ad" /> | |
| 273 <assert_stdout> | |
| 274 <has_text_matching expression="sc.pp.sqrt"/> | |
| 275 </assert_stdout> | |
| 276 <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
| 277 </test> | |
| 278 <test> | |
| 279 <conditional name="input"> | |
| 280 <param name="format" value="h5ad" /> | |
| 281 <param name="adata" value="random-randint.h5ad" /> | |
| 282 </conditional> | |
| 283 <conditional name="method"> | |
| 284 <param name="method" value="pp.downsample_counts"/> | |
| 285 <param name="target_counts" value="20000"/> | |
| 286 <param name="random_state" value="0"/> | |
| 287 </conditional> | |
| 288 <param name="anndata_output_format" value="h5ad" /> | |
| 289 <assert_stdout> | |
| 290 <has_text_matching expression="sc.pp.downsample_counts"/> | |
| 291 <has_text_matching expression="target_counts=20000"/> | |
| 292 <has_text_matching expression="random_state=0"/> | |
| 293 </assert_stdout> | |
| 294 <output name="anndata_out_h5ad" ftype="h5"> | |
| 295 <assert_contents> | |
| 296 <has_h5_keys keys="X, obs, var" /> | |
| 297 </assert_contents> | |
| 298 </output> | |
| 299 </test> | |
| 300 </tests> | |
| 301 <help><![CDATA[ | |
| 302 Normalize total counts per cell (`pp.normalize_per_cell`) | |
| 303 ========================================================= | |
| 304 | |
| 305 Normalize each cell by total counts over all genes, so that every cell has | |
| 306 the same total count after normalization. | |
| 307 | |
| 308 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. | |
| 309 | |
| 310 More details on the `scanpy documentation | |
| 311 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__ | |
| 312 | |
| 313 | |
| 314 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) | |
| 315 ================================================================================================================== | |
| 316 | |
| 317 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | |
| 318 | |
| 319 The recipe runs the following steps: | |
| 320 | |
| 321 - only consider genes with more than 1 count | |
| 322 - normalize with total UMI count per cell | |
| 323 - select highly-variable genes | |
| 324 - subset the genes | |
| 325 - renormalize after filtering | |
| 326 - log transform (if needed) | |
| 327 - scale to unit variance and shift to zero mean | |
| 328 | |
| 329 More details on the `scanpy documentation | |
| 330 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__ | |
| 331 | |
| 332 | |
| 333 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) | |
| 334 ============================================================================== | |
| 335 | |
| 336 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | |
| 337 | |
| 338 More details on the `scanpy documentation | |
| 339 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__ | |
| 340 | |
| 341 | |
| 342 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) | |
| 343 ========================================================================== | |
| 344 | |
| 345 This uses a particular preprocessing. | |
| 346 | |
| 347 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | |
| 348 | |
| 349 More details on the `scanpy documentation | |
| 350 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__ | |
| 351 | |
| 352 Logarithmize the data matrix (`pp.log1p`) | |
| 353 ========================================= | |
| 354 | |
| 355 More details on the `scanpy documentation | |
| 356 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__ | |
| 357 | |
| 358 Scale data to unit variance and zero mean (`pp.scale`) | |
| 359 ====================================================== | |
| 360 | |
| 361 More details on the `scanpy documentation | |
| 362 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__ | |
| 363 | |
| 364 Computes the square root the data matrix (`pp.sqrt`) | |
| 365 ==================================================== | |
| 366 | |
| 367 `X = sqrt(X)` | |
| 368 | |
| 369 Downsample counts (`pp.downsample_counts`) | |
| 370 ========================================== | |
| 371 | |
| 372 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This | |
| 373 has been implemented by M. D. Luecken. | |
| 374 | |
| 375 More details on the `scanpy documentation | |
| 376 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__ | |
| 377 | |
| 378 ]]></help> | |
| 379 <expand macro="citations"/> | |
| 380 </tool> |
