Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 0:7edb8980267d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
| author | iuc |
|---|---|
| date | Mon, 04 Mar 2019 10:10:38 -0500 |
| parents | |
| children | deeb0203d693 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7edb8980267d |
|---|---|
| 1 <tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@"> | |
| 2 <description></description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <expand macro="version_command"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 @CMD@ | |
| 10 ]]></command> | |
| 11 <configfiles> | |
| 12 <configfile name="script_file"><![CDATA[ | |
| 13 @CMD_imports@ | |
| 14 @CMD_read_inputs@ | |
| 15 | |
| 16 #if $method.method == 'pp.filter_cells' | |
| 17 res = sc.pp.filter_cells( | |
| 18 #if $modify_anndata.modify_anndata == 'true' | |
| 19 adata, | |
| 20 #else | |
| 21 adata.X, | |
| 22 #end if | |
| 23 #if $method.filter.filter == 'min_counts' | |
| 24 min_counts=$method.filter.min_counts, | |
| 25 #elif $method.filter.filter == 'max_counts' | |
| 26 max_counts=$method.filter.max_counts, | |
| 27 #elif $method.filter.filter == 'min_genes' | |
| 28 min_genes=$method.filter.min_genes, | |
| 29 #elif $method.filter.filter == 'max_genes' | |
| 30 max_genes=$method.filter.max_genes, | |
| 31 #end if | |
| 32 copy=False) | |
| 33 | |
| 34 #if $modify_anndata.modify_anndata == 'true' | |
| 35 df = adata.obs | |
| 36 #else | |
| 37 df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1])) | |
| 38 #end if | |
| 39 | |
| 40 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
| 41 df.to_csv('$counts_per_cell', sep='\t') | |
| 42 #elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes' | |
| 43 df.to_csv('$genes_per_cell', sep='\t') | |
| 44 #end if | |
| 45 | |
| 46 #elif $method.method == 'pp.filter_genes' | |
| 47 res = sc.pp.filter_genes( | |
| 48 #if $modify_anndata.modify_anndata == 'true' | |
| 49 adata, | |
| 50 #else | |
| 51 adata.X, | |
| 52 #end if | |
| 53 #if $method.filter.filter == 'min_counts' | |
| 54 min_counts=$method.filter.min_counts, | |
| 55 #elif $method.filter.filter == 'max_counts' | |
| 56 max_counts=$method.filter.max_counts, | |
| 57 #elif $method.filter.filter == 'min_cells' | |
| 58 min_cells=$method.filter.min_cells, | |
| 59 #elif $method.filter.filter == 'max_cells' | |
| 60 max_cells=$method.filter.max_cells, | |
| 61 #end if | |
| 62 copy=False) | |
| 63 | |
| 64 #if $modify_anndata.modify_anndata == 'true' | |
| 65 df = adata.var | |
| 66 #else | |
| 67 df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1])) | |
| 68 #end if | |
| 69 | |
| 70 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
| 71 df.to_csv('$counts_per_gene', sep='\t') | |
| 72 #elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells' | |
| 73 df.to_csv('$cells_per_gene', sep='\t') | |
| 74 #end if | |
| 75 | |
| 76 #elif $method.method == 'pp.filter_genes_dispersion' | |
| 77 res = sc.pp.filter_genes_dispersion( | |
| 78 #if $modify_anndata.modify_anndata == 'true' | |
| 79 adata, | |
| 80 #else | |
| 81 adata.X, | |
| 82 #end if | |
| 83 flavor='$method.flavor.flavor', | |
| 84 #if $method.flavor.flavor=='seurat' | |
| 85 min_mean=$method.flavor.min_mean, | |
| 86 max_mean=$method.flavor.max_mean, | |
| 87 min_disp=$method.flavor.min_disp, | |
| 88 #if $method.flavor.max_disp | |
| 89 max_disp=$method.flavor.max_disp, | |
| 90 #end if | |
| 91 #else | |
| 92 n_top_genes=$method.flavor.n_top_genes, | |
| 93 #end if | |
| 94 n_bins=$method.n_bins, | |
| 95 log=$method.log, | |
| 96 copy=False) | |
| 97 | |
| 98 #if $modify_anndata.modify_anndata == 'true' | |
| 99 adata.var.to_csv('$per_gene', sep='\t') | |
| 100 #else | |
| 101 pd.DataFrame(res).to_csv('$per_gene', sep='\t') | |
| 102 #end if | |
| 103 | |
| 104 #elif $method.method == 'pp.subsample' | |
| 105 sc.pp.subsample( | |
| 106 data=adata, | |
| 107 #if $method.type.type == 'fraction' | |
| 108 fraction=$method.type.fraction, | |
| 109 #else if $method.type.type == 'n_obs' | |
| 110 n_obs=$method.type.n_obs, | |
| 111 #end if | |
| 112 random_state=$method.random_state, | |
| 113 copy=False) | |
| 114 | |
| 115 #end if | |
| 116 | |
| 117 @CMD_anndata_write_modify_outputs@ | |
| 118 ]]></configfile> | |
| 119 </configfiles> | |
| 120 <inputs> | |
| 121 <expand macro="inputs_anndata"/> | |
| 122 <conditional name="method"> | |
| 123 <param argument="method" type="select" label="Method used for filtering"> | |
| 124 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> | |
| 125 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> | |
| 126 <option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option> | |
| 127 <!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!--> | |
| 128 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> | |
| 129 <!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!--> | |
| 130 <!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!--> | |
| 131 </param> | |
| 132 <when value="pp.filter_cells"> | |
| 133 <conditional name="filter"> | |
| 134 <param argument="filter" type="select" label="Filter"> | |
| 135 <option value="min_counts">Minimum number of counts</option> | |
| 136 <option value="max_counts">Maximum number of counts</option> | |
| 137 <option value="min_genes">Minimum number of genes expressed</option> | |
| 138 <option value="max_genes">Maximum number of genes expressed</option> | |
| 139 </param> | |
| 140 <when value="min_counts"> | |
| 141 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering" help=""/> | |
| 142 </when> | |
| 143 <when value="max_counts"> | |
| 144 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering" help=""/> | |
| 145 </when> | |
| 146 <when value="min_genes"> | |
| 147 <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering" help=""/> | |
| 148 </when> | |
| 149 <when value="max_genes"> | |
| 150 <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering" help=""/> | |
| 151 </when> | |
| 152 </conditional> | |
| 153 </when> | |
| 154 <when value="pp.filter_genes"> | |
| 155 <conditional name="filter"> | |
| 156 <param argument="filter" type="select" label="Filter"> | |
| 157 <option value="min_counts">Minimum number of counts</option> | |
| 158 <option value="max_counts">Maximum number of counts</option> | |
| 159 <option value="min_cells">Minimum number of cells expressed</option> | |
| 160 <option value="max_cells">Maximum number of cells expressed</option> | |
| 161 </param> | |
| 162 <when value="min_counts"> | |
| 163 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/> | |
| 164 </when> | |
| 165 <when value="max_counts"> | |
| 166 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/> | |
| 167 </when> | |
| 168 <when value="min_cells"> | |
| 169 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/> | |
| 170 </when> | |
| 171 <when value="max_cells"> | |
| 172 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/> | |
| 173 </when> | |
| 174 </conditional> | |
| 175 </when> | |
| 176 <when value="pp.filter_genes_dispersion"> | |
| 177 <conditional name='flavor'> | |
| 178 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help=""> | |
| 179 <option value="seurat">seurat: expects non-logarithmized data</option> | |
| 180 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> | |
| 181 </param> | |
| 182 <when value="seurat"> | |
| 183 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/> | |
| 184 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/> | |
| 185 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/> | |
| 186 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/> | |
| 187 </when> | |
| 188 <when value="cell_ranger"> | |
| 189 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/> | |
| 190 </when> | |
| 191 </conditional> | |
| 192 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> | |
| 193 <expand macro="param_log"/> | |
| 194 </when> | |
| 195 <when value="pp.subsample"> | |
| 196 <conditional name="type"> | |
| 197 <param name="type" type="select" label="Type of subsampling"> | |
| 198 <option value="fraction">By fraction</option> | |
| 199 <option value="n_obs">By number of observation</option> | |
| 200 </param> | |
| 201 <when value="fraction"> | |
| 202 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/> | |
| 203 </when> | |
| 204 <when value="n_obs"> | |
| 205 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/> | |
| 206 </when> | |
| 207 </conditional> | |
| 208 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> | |
| 209 </when> | |
| 210 </conditional> | |
| 211 <expand macro="anndata_modify_output_input"/> | |
| 212 </inputs> | |
| 213 <outputs> | |
| 214 <expand macro="anndata_modify_outputs"/> | |
| 215 <!-- for pp.filter_cells --> | |
| 216 <data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering"> | |
| 217 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
| 218 </data> | |
| 219 <data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering"> | |
| 220 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter> | |
| 221 </data> | |
| 222 <!-- for pp.filter_genes --> | |
| 223 <data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering"> | |
| 224 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
| 225 </data> | |
| 226 <data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering"> | |
| 227 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter> | |
| 228 </data> | |
| 229 <!-- for pp.filter_genes_dispersion --> | |
| 230 <data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene"> | |
| 231 <filter>method['method'] == 'pp.filter_genes_dispersion'</filter> | |
| 232 </data> | |
| 233 </outputs> | |
| 234 <tests> | |
| 235 <test expect_num_outputs="2"> | |
| 236 <conditional name="input"> | |
| 237 <param name="format" value="h5ad" /> | |
| 238 <param name="adata" value="krumsiek11.h5ad" /> | |
| 239 </conditional> | |
| 240 <conditional name="method"> | |
| 241 <param name="method" value="pp.filter_cells"/> | |
| 242 <conditional name="filter"> | |
| 243 <param name="filter" value="min_counts"/> | |
| 244 <param name="min_counts" value="3"/> | |
| 245 </conditional> | |
| 246 </conditional> | |
| 247 <conditional name="modify_anndata"> | |
| 248 <param name="modify_anndata" value="true"/> | |
| 249 <param name="anndata_output_format" value="h5ad" /> | |
| 250 </conditional> | |
| 251 <assert_stdout> | |
| 252 <has_text_matching expression="sc.pp.filter_cells"/> | |
| 253 <has_text_matching expression="min_counts=3"/> | |
| 254 </assert_stdout> | |
| 255 <output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | |
| 256 <output name="counts_per_cell"> | |
| 257 <assert_contents> | |
| 258 <has_text_matching expression="cell_type\tn_counts" /> | |
| 259 <has_text_matching expression="46\tprogenitor\t3.028" /> | |
| 260 <has_text_matching expression="85\tEry\t3.7001" /> | |
| 261 <has_text_matching expression="150\tMk\t4.095" /> | |
| 262 <has_n_columns n="3" /> | |
| 263 </assert_contents> | |
| 264 </output> | |
| 265 </test> | |
| 266 <test expect_num_outputs="2"> | |
| 267 <conditional name="input"> | |
| 268 <param name="format" value="loom" /> | |
| 269 <param name="adata" value="krumsiek11.loom" /> | |
| 270 <param name="sparse" value="True"/> | |
| 271 <param name="cleanup" value="False"/> | |
| 272 <param name="x_name" value="spliced"/> | |
| 273 <param name="obs_names" value="CellID" /> | |
| 274 <param name="var_names" value="Gene"/> | |
| 275 </conditional> | |
| 276 <conditional name="method"> | |
| 277 <param name="method" value="pp.filter_cells"/> | |
| 278 <conditional name="filter"> | |
| 279 <param name="filter" value="min_counts"/> | |
| 280 <param name="min_counts" value="3"/> | |
| 281 </conditional> | |
| 282 </conditional> | |
| 283 <conditional name="modify_anndata"> | |
| 284 <param name="modify_anndata" value="true"/> | |
| 285 <param name="anndata_output_format" value="loom" /> | |
| 286 </conditional> | |
| 287 <assert_stdout> | |
| 288 <has_text_matching expression="sc.pp.filter_cells"/> | |
| 289 <has_text_matching expression="min_counts=3"/> | |
| 290 </assert_stdout> | |
| 291 <output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/> | |
| 292 <output name="counts_per_cell"> | |
| 293 <assert_contents> | |
| 294 <has_text_matching expression="cell_type\tn_counts" /> | |
| 295 <has_text_matching expression="46\tprogenitor\t3.028" /> | |
| 296 <has_text_matching expression="85\tEry\t3.7001" /> | |
| 297 <has_text_matching expression="97\tMo\t3.925" /> | |
| 298 <has_text_matching expression="150\tMk\t4.095" /> | |
| 299 <has_n_columns n="3" /> | |
| 300 </assert_contents> | |
| 301 </output> | |
| 302 </test> | |
| 303 <test expect_num_outputs="1"> | |
| 304 <conditional name="input"> | |
| 305 <param name="format" value="h5ad" /> | |
| 306 <param name="adata" value="krumsiek11.h5ad"/> | |
| 307 </conditional> | |
| 308 <conditional name="method"> | |
| 309 <param name="method" value="pp.filter_cells"/> | |
| 310 <conditional name="filter"> | |
| 311 <param name="filter" value="max_genes"/> | |
| 312 <param name="max_genes" value="100"/> | |
| 313 </conditional> | |
| 314 </conditional> | |
| 315 <conditional name="modify_anndata"> | |
| 316 <param name="modify_anndata" value="false"/> | |
| 317 </conditional> | |
| 318 <assert_stdout> | |
| 319 <has_text_matching expression="sc.pp.filter_cells"/> | |
| 320 <has_text_matching expression="adata.X"/> | |
| 321 <has_text_matching expression="max_genes=100"/> | |
| 322 </assert_stdout> | |
| 323 <output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/> | |
| 324 </test> | |
| 325 <test expect_num_outputs="2"> | |
| 326 <conditional name="input"> | |
| 327 <param name="format" value="h5ad" /> | |
| 328 <param name="adata" value="krumsiek11.h5ad" /> | |
| 329 </conditional> | |
| 330 <conditional name="method"> | |
| 331 <param name="method" value="pp.filter_genes"/> | |
| 332 <conditional name="filter"> | |
| 333 <param name="filter" value="min_counts"/> | |
| 334 <param name="min_counts" value="3"/> | |
| 335 </conditional> | |
| 336 </conditional> | |
| 337 <conditional name="modify_anndata"> | |
| 338 <param name="modify_anndata" value="true"/> | |
| 339 <param name="anndata_output_format" value="h5ad" /> | |
| 340 </conditional> | |
| 341 <assert_stdout> | |
| 342 <has_text_matching expression="sc.pp.filter_genes"/> | |
| 343 <has_text_matching expression="min_counts=3"/> | |
| 344 </assert_stdout> | |
| 345 <output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | |
| 346 <output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/> | |
| 347 </test> | |
| 348 <test expect_num_outputs="1"> | |
| 349 <conditional name="input"> | |
| 350 <param name="format" value="h5ad" /> | |
| 351 <param name="adata" value="pbmc68k_reduced.h5ad"/> | |
| 352 </conditional> | |
| 353 <conditional name="method"> | |
| 354 <param name="method" value="pp.filter_genes"/> | |
| 355 <conditional name="filter"> | |
| 356 <param name="filter" value="max_cells"/> | |
| 357 <param name="max_cells" value="500"/> | |
| 358 </conditional> | |
| 359 </conditional> | |
| 360 <conditional name="modify_anndata"> | |
| 361 <param name="modify_anndata" value="false"/> | |
| 362 </conditional> | |
| 363 <assert_stdout> | |
| 364 <has_text_matching expression="sc.pp.filter_genes"/> | |
| 365 <has_text_matching expression="adata.X"/> | |
| 366 <has_text_matching expression="max_cells=500"/> | |
| 367 </assert_stdout> | |
| 368 <output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/> | |
| 369 </test> | |
| 370 <test expect_num_outputs="2"> | |
| 371 <conditional name="input"> | |
| 372 <param name="format" value="h5ad" /> | |
| 373 <param name="adata" value="krumsiek11.h5ad" /> | |
| 374 </conditional> | |
| 375 <conditional name="method"> | |
| 376 <param name="method" value="pp.filter_genes_dispersion"/> | |
| 377 <conditional name="flavor"> | |
| 378 <param name="flavor" value="seurat"/> | |
| 379 <param name="min_mean" value="0.0125"/> | |
| 380 <param name="max_mean" value="3"/> | |
| 381 <param name="min_disp" value="0.5"/> | |
| 382 </conditional> | |
| 383 <param name="n_bins" value="20" /> | |
| 384 <param name="log" value="true"/> | |
| 385 </conditional> | |
| 386 <conditional name="modify_anndata"> | |
| 387 <param name="modify_anndata" value="true"/> | |
| 388 <param name="anndata_output_format" value="h5ad" /> | |
| 389 </conditional> | |
| 390 <assert_stdout> | |
| 391 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
| 392 <has_text_matching expression="flavor='seurat'"/> | |
| 393 <has_text_matching expression="min_mean=0.0125"/> | |
| 394 <has_text_matching expression="max_mean=3.0"/> | |
| 395 <has_text_matching expression="min_disp=0.5"/> | |
| 396 <has_text_matching expression="n_bins=20"/> | |
| 397 <has_text_matching expression="log=True"/> | |
| 398 </assert_stdout> | |
| 399 <output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/> | |
| 400 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/> | |
| 401 </test> | |
| 402 <test expect_num_outputs="1"> | |
| 403 <conditional name="input"> | |
| 404 <param name="format" value="h5ad" /> | |
| 405 <param name="adata" value="krumsiek11.h5ad" /> | |
| 406 </conditional> | |
| 407 <conditional name="method"> | |
| 408 <param name="method" value="pp.filter_genes_dispersion"/> | |
| 409 <conditional name="flavor"> | |
| 410 <param name="flavor" value="cell_ranger"/> | |
| 411 <param name="n_top_genes" value="2"/> | |
| 412 </conditional> | |
| 413 <param name="n_bins" value="20"/> | |
| 414 <param name="log" value="true"/> | |
| 415 </conditional> | |
| 416 <conditional name="modify_anndata"> | |
| 417 <param name="modify_anndata" value="false"/> | |
| 418 </conditional> | |
| 419 <assert_stdout> | |
| 420 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
| 421 <has_text_matching expression="flavor='cell_ranger'"/> | |
| 422 <has_text_matching expression="n_top_genes=2"/> | |
| 423 <has_text_matching expression="n_bins=20"/> | |
| 424 <has_text_matching expression="og=True"/> | |
| 425 </assert_stdout> | |
| 426 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/> | |
| 427 </test> | |
| 428 <test expect_num_outputs="1"> | |
| 429 <conditional name="input"> | |
| 430 <param name="format" value="h5ad" /> | |
| 431 <param name="adata" value="krumsiek11.h5ad" /> | |
| 432 </conditional> | |
| 433 <conditional name="method"> | |
| 434 <param name="method" value="pp.subsample"/> | |
| 435 <conditional name="type"> | |
| 436 <param name="type" value="fraction" /> | |
| 437 <param name="fraction" value="0.5"/> | |
| 438 </conditional> | |
| 439 <param name="random_state" value="0"/> | |
| 440 </conditional> | |
| 441 <conditional name="modify_anndata"> | |
| 442 <param name="modify_anndata" value="true"/> | |
| 443 <param name="anndata_output_format" value="h5ad" /> | |
| 444 </conditional> | |
| 445 <assert_stdout> | |
| 446 <has_text_matching expression="sc.pp.subsample"/> | |
| 447 <has_text_matching expression="fraction=0.5"/> | |
| 448 <has_text_matching expression="random_state=0"/> | |
| 449 </assert_stdout> | |
| 450 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/> | |
| 451 </test> | |
| 452 <test expect_num_outputs="1"> | |
| 453 <conditional name="input"> | |
| 454 <param name="format" value="h5ad" /> | |
| 455 <param name="adata" value="krumsiek11.h5ad" /> | |
| 456 </conditional> | |
| 457 <conditional name="method"> | |
| 458 <param name="method" value="pp.subsample"/> | |
| 459 <conditional name="type"> | |
| 460 <param name="type" value="n_obs" /> | |
| 461 <param name="n_obs" value="10"/> | |
| 462 </conditional> | |
| 463 <param name="random_state" value="0"/> | |
| 464 </conditional> | |
| 465 <conditional name="modify_anndata"> | |
| 466 <param name="modify_anndata" value="true"/> | |
| 467 <param name="anndata_output_format" value="h5ad" /> | |
| 468 </conditional> | |
| 469 <assert_stdout> | |
| 470 <has_text_matching expression="sc.pp.subsample"/> | |
| 471 <has_text_matching expression="n_obs=10"/> | |
| 472 <has_text_matching expression="random_state=0"/> | |
| 473 </assert_stdout> | |
| 474 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/> | |
| 475 </test> | |
| 476 </tests> | |
| 477 <help><![CDATA[ | |
| 478 | |
| 479 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) | |
| 480 ======================================================================================== | |
| 481 | |
| 482 For instance, only keep cells with at least `min_counts` counts or | |
| 483 `min_genes` genes expressed. This is to filter measurement outliers, i.e., | |
| 484 "unreliable" observations. | |
| 485 | |
| 486 Only provide one of the optional parameters `min_counts`, `min_genes`, | |
| 487 `max_counts`, `max_genes` per call. | |
| 488 | |
| 489 More details on the `scanpy documentation | |
| 490 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__ | |
| 491 | |
| 492 Return | |
| 493 ------ | |
| 494 | |
| 495 number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell) | |
| 496 | |
| 497 | |
| 498 Filter genes based on number of cells or counts (`pp.filter_genes`) | |
| 499 =================================================================== | |
| 500 | |
| 501 Keep genes that have at least `min_counts` counts or are expressed in at | |
| 502 least `min_cells` cells or have at most `max_counts` counts or are expressed | |
| 503 in at most `max_cells` cells. | |
| 504 | |
| 505 Only provide one of the optional parameters `min_counts`, `min_cells`, | |
| 506 `max_counts`, `max_cells` per call. | |
| 507 | |
| 508 More details on the `scanpy documentation | |
| 509 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__ | |
| 510 | |
| 511 Return | |
| 512 ------ | |
| 513 | |
| 514 number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell) | |
| 515 | |
| 516 | |
| 517 Extract highly variable genes (`pp.filter_genes_dispersion`) | |
| 518 ============================================================ | |
| 519 | |
| 520 If trying out parameters, pass the data matrix instead of AnnData. | |
| 521 | |
| 522 Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger. | |
| 523 | |
| 524 The normalized dispersion is obtained by scaling with the mean and standard | |
| 525 deviation of the dispersions for genes falling into a given bin for mean | |
| 526 expression of genes. This means that for each bin of mean expression, highly | |
| 527 variable genes are selected. | |
| 528 | |
| 529 Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`. | |
| 530 | |
| 531 More details on the `scanpy documentation | |
| 532 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__ | |
| 533 | |
| 534 Returns | |
| 535 ------- | |
| 536 - The annotated matrix filtered, with the annotations | |
| 537 - A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`. | |
| 538 | |
| 539 | |
| 540 Subsample to a fraction of the number of observations (`pp.subsample`) | |
| 541 ====================================================================== | |
| 542 | |
| 543 More details on the `scanpy documentation | |
| 544 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__ | |
| 545 | |
| 546 | |
| 547 ]]></help> | |
| 548 <expand macro="citations"/> | |
| 549 </tool> |
