Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 18:2a55e0dae43a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 637a6ebb9ca7f745c83146151cb1655cc902afc6
| author | iuc |
|---|---|
| date | Thu, 09 Jan 2025 15:50:45 +0000 |
| parents | eb36554fd6f9 |
| children | f6c7f9802673 |
comparison
equal
deleted
inserted
replaced
| 17:8f662540bd6e | 18:2a55e0dae43a |
|---|---|
| 1 <tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | 1 <tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@"> |
| 2 <description>mark and subsample</description> | 2 <description>mark and subsample</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="bio_tools"/> | 6 <expand macro="bio_tools"/> |
| 44 min_cells=$method.filter.min_cells, | 44 min_cells=$method.filter.min_cells, |
| 45 #else if $method.filter.filter == 'max_cells' | 45 #else if $method.filter.filter == 'max_cells' |
| 46 max_cells=$method.filter.max_cells, | 46 max_cells=$method.filter.max_cells, |
| 47 #end if | 47 #end if |
| 48 copy=False) | 48 copy=False) |
| 49 | |
| 50 @CMD_ANNDATA_WRITE_OUTPUTS@ | |
| 51 | |
| 52 #else if $method.method == 'filter_any' | |
| 53 #if $method.filter.filter == 'key' | |
| 54 #if $method.var_obs == 'var' | |
| 55 filtered = adata.var['$method.filter.key'] | |
| 56 #else if $method.var_obs == 'obs' | |
| 57 filtered = adata.obs['$method.filter.key'] | |
| 58 #end if | |
| 59 | |
| 60 #if $method.filter.filter_key.type == 'number' | |
| 61 #if $method.filter.filter_key.filter == 'equal' | |
| 62 filtered = filtered == $method.filter.filter_key.value | |
| 63 #else if $method.filter.filter_key.filter == 'equal' | |
| 64 filtered = filtered != $method.filter.filter_key.value | |
| 65 #else if $method.filter.filter_key.filter == 'less' | |
| 66 filtered = filtered < $method.filter.filter_key.value | |
| 67 #else if $method.filter.filter_key.filter == 'less_or_equal' | |
| 68 filtered = filtered <= $method.filter.filter_key.value | |
| 69 #else if $method.filter.filter_key.filter == 'greater' | |
| 70 filtered = filtered > $method.filter.filter_key.value | |
| 71 #else if $method.filter.filter_key.filter == 'greater_or_equal' | |
| 72 filtered = filtered >= $method.filter.filter_key.value | |
| 73 #end if | |
| 74 #else if $method.filter.filter_key.type == 'text' | |
| 75 #if $method.filter.filter_key.filter == 'equal' | |
| 76 filtered = filtered == '$method.filter.filter_key.value' | |
| 77 #else | |
| 78 filtered = filtered != '$method.filter.filter_key.value' | |
| 79 #end if | |
| 80 #else if $method.filter.filter_key.type == 'boolean' | |
| 81 filtered = filtered == $method.filter.filter_key.value | |
| 82 #end if | |
| 83 | |
| 84 #else if $method.filter.filter == 'index' | |
| 85 #if str($method.filter.index.format) == 'file' | |
| 86 with open('$method.filter.index.file', 'r') as filter_f: | |
| 87 filters = [str(x.strip()) for x in filter_f.readlines()] | |
| 88 filtered = filters | |
| 89 #else | |
| 90 #set $filters = [str(x.strip()) for x in $method.filter.index.text.split(',')] | |
| 91 filtered = $filters | |
| 92 #end if | |
| 93 #end if | |
| 94 print(filtered) | |
| 95 | |
| 96 #if $method.var_obs == 'var' | |
| 97 adata = adata[:,filtered] | |
| 98 #else if $method.var_obs == 'obs' | |
| 99 adata = adata[filtered, :] | |
| 100 #end if | |
| 49 | 101 |
| 50 @CMD_ANNDATA_WRITE_OUTPUTS@ | 102 @CMD_ANNDATA_WRITE_OUTPUTS@ |
| 51 | 103 |
| 52 #else if $method.method == 'tl.filter_rank_genes_groups' | 104 #else if $method.method == 'tl.filter_rank_genes_groups' |
| 53 sc.tl.filter_rank_genes_groups( | 105 sc.tl.filter_rank_genes_groups( |
| 216 <expand macro="inputs_anndata"/> | 268 <expand macro="inputs_anndata"/> |
| 217 <conditional name="method"> | 269 <conditional name="method"> |
| 218 <param argument="method" type="select" label="Method used for filtering"> | 270 <param argument="method" type="select" label="Method used for filtering"> |
| 219 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> | 271 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> |
| 220 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> | 272 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> |
| 273 <option value="filter_any">Filter on any column of observations or variables</option> | |
| 221 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> | 274 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> |
| 222 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> | 275 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> |
| 223 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> | 276 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> |
| 224 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> | 277 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> |
| 225 <option value="filter_marker">Filter markers from count matrix and marker list</option> | 278 <option value="filter_marker">Filter markers from count matrix and marker list</option> |
| 232 <option value="max_counts">Maximum number of counts</option> | 285 <option value="max_counts">Maximum number of counts</option> |
| 233 <option value="min_genes">Minimum number of genes expressed</option> | 286 <option value="min_genes">Minimum number of genes expressed</option> |
| 234 <option value="max_genes">Maximum number of genes expressed</option> | 287 <option value="max_genes">Maximum number of genes expressed</option> |
| 235 </param> | 288 </param> |
| 236 <when value="min_counts"> | 289 <when value="min_counts"> |
| 237 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering"/> | 290 <param argument="min_counts" type="integer" min="0" value="0" label="Minimum number of counts required for a cell to pass filtering"/> |
| 238 </when> | 291 </when> |
| 239 <when value="max_counts"> | 292 <when value="max_counts"> |
| 240 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering"/> | 293 <param argument="max_counts" type="integer" min="0" value="100000000" label="Maximum number of counts required for a cell to pass filtering"/> |
| 241 </when> | 294 </when> |
| 242 <when value="min_genes"> | 295 <when value="min_genes"> |
| 243 <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering"/> | 296 <param argument="min_genes" type="integer" min="0" value="0" label="Minimum number of genes expressed required for a cell to pass filtering"/> |
| 244 </when> | 297 </when> |
| 245 <when value="max_genes"> | 298 <when value="max_genes"> |
| 246 <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering"/> | 299 <param argument="max_genes" type="integer" min="0" value="100000000" label="Maximum number of genes expressed required for a cell to pass filtering"/> |
| 247 </when> | 300 </when> |
| 248 </conditional> | 301 </conditional> |
| 249 </when> | 302 </when> |
| 250 <when value="pp.filter_genes"> | 303 <when value="pp.filter_genes"> |
| 251 <conditional name="filter"> | 304 <conditional name="filter"> |
| 254 <option value="max_counts">Maximum number of counts</option> | 307 <option value="max_counts">Maximum number of counts</option> |
| 255 <option value="min_cells">Minimum number of cells expressed</option> | 308 <option value="min_cells">Minimum number of cells expressed</option> |
| 256 <option value="max_cells">Maximum number of cells expressed</option> | 309 <option value="max_cells">Maximum number of cells expressed</option> |
| 257 </param> | 310 </param> |
| 258 <when value="min_counts"> | 311 <when value="min_counts"> |
| 259 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> | 312 <param argument="min_counts" type="integer" min="0" value="" optional="true" label="Minimum number of counts required for a gene to pass filtering"/> |
| 260 </when> | 313 </when> |
| 261 <when value="max_counts"> | 314 <when value="max_counts"> |
| 262 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> | 315 <param argument="max_counts" type="integer" min="0" value="" optional="true" label="Maximum number of counts required for a gene to pass filtering"/> |
| 263 </when> | 316 </when> |
| 264 <when value="min_cells"> | 317 <when value="min_cells"> |
| 265 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> | 318 <param argument="min_cells" type="integer" min="0" value="" optional="true" label="Minimum number of cells expressed required for a gene to pass filtering"/> |
| 266 </when> | 319 </when> |
| 267 <when value="max_cells"> | 320 <when value="max_cells"> |
| 268 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> | 321 <param argument="max_cells" type="integer" min="0" value="" optional="true" label="Maximum number of cells expressed required for a gene to pass filtering"/> |
| 322 </when> | |
| 323 </conditional> | |
| 324 </when> | |
| 325 <when value="filter_any"> | |
| 326 <param name="var_obs" type="select" label="What to filter?"> | |
| 327 <option value="var">Variables (var)</option> | |
| 328 <option value="obs">Observations (obs)</option> | |
| 329 </param> | |
| 330 <conditional name="filter"> | |
| 331 <param name="filter" type="select" label="Type of filtering?"> | |
| 332 <option value="key">By key (column) values</option> | |
| 333 <option value="index">By index (row)</option> | |
| 334 </param> | |
| 335 <when value="key"> | |
| 336 <param name="key" type="text" value="n_genes" label="Key to filter"/> | |
| 337 <conditional name="filter_key"> | |
| 338 <param name="type" type="select" label="Type of value to filter"> | |
| 339 <option value="number">Number</option> | |
| 340 <option value="text">Text</option> | |
| 341 <option value="boolean">Boolean</option> | |
| 342 </param> | |
| 343 <when value="number"> | |
| 344 <param name="filter" type="select" label="Filter"> | |
| 345 <option value="equal">equal to</option> | |
| 346 <option value="not_equal">not equal to</option> | |
| 347 <option value="less">less than</option> | |
| 348 <option value="less_or_equal">less than or equal to</option> | |
| 349 <option value="greater">greater than</option> | |
| 350 <option value="greater_or_equal">greater than or equal to</option> | |
| 351 </param> | |
| 352 <param name="value" type="float" value="2500" label="Value"/> | |
| 353 </when> | |
| 354 <when value="text"> | |
| 355 <param name="filter" type="select" label="Filter"> | |
| 356 <option value="equal">equal to</option> | |
| 357 <option value="not_equal">not equal to</option></param> | |
| 358 <param name="value" type="text" value="2500" label="Value"/> | |
| 359 </when> | |
| 360 <when value="boolean"> | |
| 361 <param name="value" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Value to keep"/> | |
| 362 </when> | |
| 363 </conditional> | |
| 364 </when> | |
| 365 <when value="index"> | |
| 366 <conditional name="index"> | |
| 367 <param name="format" type="select" label="Format for the filter by index"> | |
| 368 <option value="file">File</option> | |
| 369 <option value="text" selected="true">Text</option> | |
| 370 </param> | |
| 371 <when value="text"> | |
| 372 <param name="text" type="text" value="" label="List of index to keep" help="Indexes separated by a comma"/> | |
| 373 </when> | |
| 374 <when value="file"> | |
| 375 <param name="file" type="data" format="txt" label="File with the list of index to keep" help="One index per line"/> | |
| 376 </when> | |
| 377 </conditional> | |
| 269 </when> | 378 </when> |
| 270 </conditional> | 379 </conditional> |
| 271 </when> | 380 </when> |
| 272 <when value="tl.filter_rank_genes_groups"> | 381 <when value="tl.filter_rank_genes_groups"> |
| 273 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"> | 382 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"> |
| 476 <has_h5_keys keys="obs/cell_type"/> | 585 <has_h5_keys keys="obs/cell_type"/> |
| 477 </assert_contents> | 586 </assert_contents> |
| 478 </output> | 587 </output> |
| 479 </test> | 588 </test> |
| 480 | 589 |
| 481 <!-- test 4 --> | 590 <test expect_num_outputs="1"> |
| 591 <!-- test 4 --> | |
| 592 <param name="adata" value="krumsiek11.h5ad"/> | |
| 593 <conditional name="method"> | |
| 594 <param name="method" value="filter_any"/> | |
| 595 <param name="var_obs" value="var"/> | |
| 596 <conditional name="filter"> | |
| 597 <param name="filter" value="index"/> | |
| 598 <conditional name="index"> | |
| 599 <param name="format" value="text"/> | |
| 600 <param name="text" value="Gata2,EKLF"/> | |
| 601 </conditional> | |
| 602 </conditional> | |
| 603 </conditional> | |
| 604 <assert_stdout> | |
| 605 <has_text_matching expression="640 × 2"/> | |
| 606 </assert_stdout> | |
| 607 <output name="anndata_out" ftype="h5ad"> | |
| 608 <assert_contents> | |
| 609 <has_h5_keys keys="obs/cell_type"/> | |
| 610 <has_h5_keys keys="uns/highlights"/> | |
| 611 <has_h5_keys keys="uns/iroot"/> | |
| 612 </assert_contents> | |
| 613 </output> | |
| 614 </test> | |
| 615 <test expect_num_outputs="1"> | |
| 616 <!-- test 5 --> | |
| 617 <param name="adata" value="krumsiek11.h5ad"/> | |
| 618 <conditional name="method"> | |
| 619 <param name="method" value="filter_any"/> | |
| 620 <param name="var_obs" value="obs"/> | |
| 621 <conditional name="filter"> | |
| 622 <param name="filter" value="key"/> | |
| 623 <param name="key" value="cell_type"/> | |
| 624 <conditional name="filter_key"> | |
| 625 <param name="type" value="text"/> | |
| 626 <param name="filter" value="equal"/> | |
| 627 <param name="value" value="progenitor"/> | |
| 628 </conditional> | |
| 629 </conditional> | |
| 630 </conditional> | |
| 631 <assert_stdout> | |
| 632 <has_text_matching expression="320 × 11"/> | |
| 633 </assert_stdout> | |
| 634 <output name="anndata_out" ftype="h5ad"> | |
| 635 <assert_contents> | |
| 636 <has_h5_keys keys="obs/cell_type"/> | |
| 637 <has_h5_keys keys="uns/highlights"/> | |
| 638 <has_h5_keys keys="uns/iroot"/> | |
| 639 </assert_contents> | |
| 640 </output> | |
| 641 </test> | |
| 642 | |
| 643 <!-- test 6 --> | |
| 482 <!-- Fails to write to anndata after tl.filter_rank_genes_groups | 644 <!-- Fails to write to anndata after tl.filter_rank_genes_groups |
| 483 Issue has been reported here: https://github.com/scverse/anndata/issues/726 | 645 Issue has been reported here: https://github.com/scverse/anndata/issues/726 |
| 484 The current fix is: del adata.uns['rank_genes_groups_filtered'] --> | 646 The current fix is: del adata.uns['rank_genes_groups_filtered'] --> |
| 485 <!-- The issue is fixed in the script here --> | 647 <!-- The issue is fixed in the script here --> |
| 486 <test expect_num_outputs="2"> | 648 <test expect_num_outputs="2"> |
| 509 <has_h5_keys keys="uns/rank_genes_groups_filtered"/> | 671 <has_h5_keys keys="uns/rank_genes_groups_filtered"/> |
| 510 </assert_contents> | 672 </assert_contents> |
| 511 </output> | 673 </output> |
| 512 </test> | 674 </test> |
| 513 | 675 |
| 514 <!-- test 5 --> | 676 <!-- test 7 --> |
| 515 <test expect_num_outputs="2"> | 677 <test expect_num_outputs="2"> |
| 516 <param name="adata" value="blobs.h5ad"/> | 678 <param name="adata" value="blobs.h5ad"/> |
| 517 <conditional name="method"> | 679 <conditional name="method"> |
| 518 <param name="method" value="pp.highly_variable_genes"/> | 680 <param name="method" value="pp.highly_variable_genes"/> |
| 519 </conditional> | 681 </conditional> |
| 537 <has_h5_keys keys="uns/hvg"/> | 699 <has_h5_keys keys="uns/hvg"/> |
| 538 </assert_contents> | 700 </assert_contents> |
| 539 </output> | 701 </output> |
| 540 </test> | 702 </test> |
| 541 | 703 |
| 542 <!-- test 6 --> | 704 <!-- test 8 --> |
| 543 <test expect_num_outputs="2"> | 705 <test expect_num_outputs="2"> |
| 544 <param name="adata" value="krumsiek11.h5ad"/> | 706 <param name="adata" value="krumsiek11.h5ad"/> |
| 545 <conditional name="method"> | 707 <conditional name="method"> |
| 546 <param name="method" value="pp.highly_variable_genes"/> | 708 <param name="method" value="pp.highly_variable_genes"/> |
| 547 <conditional name="flavor"> | 709 <conditional name="flavor"> |
| 568 <has_h5_keys keys="uns/hvg"/> | 730 <has_h5_keys keys="uns/hvg"/> |
| 569 </assert_contents> | 731 </assert_contents> |
| 570 </output> | 732 </output> |
| 571 </test> | 733 </test> |
| 572 | 734 |
| 573 <!-- test 7 --> | 735 <!-- test 9 --> |
| 574 <test expect_num_outputs="2"> | 736 <test expect_num_outputs="2"> |
| 575 <param name="adata" value="krumsiek11.h5ad"/> | 737 <param name="adata" value="krumsiek11.h5ad"/> |
| 576 <conditional name="method"> | 738 <conditional name="method"> |
| 577 <param name="method" value="pp.subsample"/> | 739 <param name="method" value="pp.subsample"/> |
| 578 <conditional name="type"> | 740 <conditional name="type"> |
| 598 <has_h5_keys keys="obs/cell_type"/> | 760 <has_h5_keys keys="obs/cell_type"/> |
| 599 </assert_contents> | 761 </assert_contents> |
| 600 </output> | 762 </output> |
| 601 </test> | 763 </test> |
| 602 | 764 |
| 603 <!-- test 8 --> | 765 <!-- test 10 --> |
| 604 <test expect_num_outputs="2"> | 766 <test expect_num_outputs="2"> |
| 605 <param name="adata" value="krumsiek11.h5ad"/> | 767 <param name="adata" value="krumsiek11.h5ad"/> |
| 606 <conditional name="method"> | 768 <conditional name="method"> |
| 607 <param name="method" value="pp.subsample"/> | 769 <param name="method" value="pp.subsample"/> |
| 608 <conditional name="type"> | 770 <conditional name="type"> |
| 609 <param name="type" value="n_obs"/> | 771 <param name="type" value="n_obs"/> |
| 628 <has_h5_keys keys="obs/cell_type"/> | 790 <has_h5_keys keys="obs/cell_type"/> |
| 629 </assert_contents> | 791 </assert_contents> |
| 630 </output> | 792 </output> |
| 631 </test> | 793 </test> |
| 632 | 794 |
| 633 <!-- test 9 --> | 795 <!-- test 11 --> |
| 634 <test expect_num_outputs="2"> | 796 <test expect_num_outputs="2"> |
| 635 <param name="adata" value="random-randint.h5ad"/> | 797 <param name="adata" value="random-randint.h5ad"/> |
| 636 <conditional name="method"> | 798 <conditional name="method"> |
| 637 <param name="method" value="pp.downsample_counts"/> | 799 <param name="method" value="pp.downsample_counts"/> |
| 638 <param name="total_counts" value="20000"/> | 800 <param name="total_counts" value="20000"/> |
| 655 <has_h5_keys keys="var/index"/> | 817 <has_h5_keys keys="var/index"/> |
| 656 </assert_contents> | 818 </assert_contents> |
| 657 </output> | 819 </output> |
| 658 </test> | 820 </test> |
| 659 | 821 |
| 660 <!-- test 10 --> | 822 <!-- test 12 --> |
| 661 <test expect_num_outputs="2"> | 823 <test expect_num_outputs="2"> |
| 662 <param name="adata" value="random-randint.h5ad"/> | 824 <param name="adata" value="random-randint.h5ad"/> |
| 663 <conditional name="method"> | 825 <conditional name="method"> |
| 664 <param name="method" value="pp.downsample_counts"/> | 826 <param name="method" value="pp.downsample_counts"/> |
| 665 <param name="counts_per_cell" value="20000"/> | 827 <param name="counts_per_cell" value="20000"/> |
| 684 <has_h5_keys keys="var/index"/> | 846 <has_h5_keys keys="var/index"/> |
| 685 </assert_contents> | 847 </assert_contents> |
| 686 </output> | 848 </output> |
| 687 </test> | 849 </test> |
| 688 | 850 |
| 689 <!-- test 10 --> | 851 <!-- test 13 --> |
| 690 <test expect_num_outputs="2"> | 852 <test expect_num_outputs="2"> |
| 691 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/> | 853 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/> |
| 692 <conditional name="method"> | 854 <conditional name="method"> |
| 693 <param name="method" value="filter_marker"/> | 855 <param name="method" value="filter_marker"/> |
| 694 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/> | 856 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/> |
| 714 <has_text text="GZMB"/> | 876 <has_text text="GZMB"/> |
| 715 </assert_contents> | 877 </assert_contents> |
| 716 </output> | 878 </output> |
| 717 </test> | 879 </test> |
| 718 | 880 |
| 719 <!-- test 11 --> | 881 <!-- test 14 --> |
| 720 <test expect_num_outputs="2"> | 882 <test expect_num_outputs="2"> |
| 721 <param name="adata" value="krumsiek11.h5ad"/> | 883 <param name="adata" value="krumsiek11.h5ad"/> |
| 722 <conditional name="method"> | 884 <conditional name="method"> |
| 723 <param name="method" value="pp.scrublet"/> | 885 <param name="method" value="pp.scrublet"/> |
| 724 <param name="n_prin_comps" value="5"/> | 886 <param name="n_prin_comps" value="5"/> |
