comparison filter.xml @ 18:2a55e0dae43a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 637a6ebb9ca7f745c83146151cb1655cc902afc6
author iuc
date Thu, 09 Jan 2025 15:50:45 +0000
parents eb36554fd6f9
children f6c7f9802673
comparison
equal deleted inserted replaced
17:8f662540bd6e 18:2a55e0dae43a
1 <tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@">
2 <description>mark and subsample</description> 2 <description>mark and subsample</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
44 min_cells=$method.filter.min_cells, 44 min_cells=$method.filter.min_cells,
45 #else if $method.filter.filter == 'max_cells' 45 #else if $method.filter.filter == 'max_cells'
46 max_cells=$method.filter.max_cells, 46 max_cells=$method.filter.max_cells,
47 #end if 47 #end if
48 copy=False) 48 copy=False)
49
50 @CMD_ANNDATA_WRITE_OUTPUTS@
51
52 #else if $method.method == 'filter_any'
53 #if $method.filter.filter == 'key'
54 #if $method.var_obs == 'var'
55 filtered = adata.var['$method.filter.key']
56 #else if $method.var_obs == 'obs'
57 filtered = adata.obs['$method.filter.key']
58 #end if
59
60 #if $method.filter.filter_key.type == 'number'
61 #if $method.filter.filter_key.filter == 'equal'
62 filtered = filtered == $method.filter.filter_key.value
63 #else if $method.filter.filter_key.filter == 'equal'
64 filtered = filtered != $method.filter.filter_key.value
65 #else if $method.filter.filter_key.filter == 'less'
66 filtered = filtered < $method.filter.filter_key.value
67 #else if $method.filter.filter_key.filter == 'less_or_equal'
68 filtered = filtered <= $method.filter.filter_key.value
69 #else if $method.filter.filter_key.filter == 'greater'
70 filtered = filtered > $method.filter.filter_key.value
71 #else if $method.filter.filter_key.filter == 'greater_or_equal'
72 filtered = filtered >= $method.filter.filter_key.value
73 #end if
74 #else if $method.filter.filter_key.type == 'text'
75 #if $method.filter.filter_key.filter == 'equal'
76 filtered = filtered == '$method.filter.filter_key.value'
77 #else
78 filtered = filtered != '$method.filter.filter_key.value'
79 #end if
80 #else if $method.filter.filter_key.type == 'boolean'
81 filtered = filtered == $method.filter.filter_key.value
82 #end if
83
84 #else if $method.filter.filter == 'index'
85 #if str($method.filter.index.format) == 'file'
86 with open('$method.filter.index.file', 'r') as filter_f:
87 filters = [str(x.strip()) for x in filter_f.readlines()]
88 filtered = filters
89 #else
90 #set $filters = [str(x.strip()) for x in $method.filter.index.text.split(',')]
91 filtered = $filters
92 #end if
93 #end if
94 print(filtered)
95
96 #if $method.var_obs == 'var'
97 adata = adata[:,filtered]
98 #else if $method.var_obs == 'obs'
99 adata = adata[filtered, :]
100 #end if
49 101
50 @CMD_ANNDATA_WRITE_OUTPUTS@ 102 @CMD_ANNDATA_WRITE_OUTPUTS@
51 103
52 #else if $method.method == 'tl.filter_rank_genes_groups' 104 #else if $method.method == 'tl.filter_rank_genes_groups'
53 sc.tl.filter_rank_genes_groups( 105 sc.tl.filter_rank_genes_groups(
216 <expand macro="inputs_anndata"/> 268 <expand macro="inputs_anndata"/>
217 <conditional name="method"> 269 <conditional name="method">
218 <param argument="method" type="select" label="Method used for filtering"> 270 <param argument="method" type="select" label="Method used for filtering">
219 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> 271 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option>
220 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> 272 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option>
273 <option value="filter_any">Filter on any column of observations or variables</option>
221 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> 274 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option>
222 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> 275 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option>
223 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> 276 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option>
224 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> 277 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option>
225 <option value="filter_marker">Filter markers from count matrix and marker list</option> 278 <option value="filter_marker">Filter markers from count matrix and marker list</option>
232 <option value="max_counts">Maximum number of counts</option> 285 <option value="max_counts">Maximum number of counts</option>
233 <option value="min_genes">Minimum number of genes expressed</option> 286 <option value="min_genes">Minimum number of genes expressed</option>
234 <option value="max_genes">Maximum number of genes expressed</option> 287 <option value="max_genes">Maximum number of genes expressed</option>
235 </param> 288 </param>
236 <when value="min_counts"> 289 <when value="min_counts">
237 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering"/> 290 <param argument="min_counts" type="integer" min="0" value="0" label="Minimum number of counts required for a cell to pass filtering"/>
238 </when> 291 </when>
239 <when value="max_counts"> 292 <when value="max_counts">
240 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering"/> 293 <param argument="max_counts" type="integer" min="0" value="100000000" label="Maximum number of counts required for a cell to pass filtering"/>
241 </when> 294 </when>
242 <when value="min_genes"> 295 <when value="min_genes">
243 <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering"/> 296 <param argument="min_genes" type="integer" min="0" value="0" label="Minimum number of genes expressed required for a cell to pass filtering"/>
244 </when> 297 </when>
245 <when value="max_genes"> 298 <when value="max_genes">
246 <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering"/> 299 <param argument="max_genes" type="integer" min="0" value="100000000" label="Maximum number of genes expressed required for a cell to pass filtering"/>
247 </when> 300 </when>
248 </conditional> 301 </conditional>
249 </when> 302 </when>
250 <when value="pp.filter_genes"> 303 <when value="pp.filter_genes">
251 <conditional name="filter"> 304 <conditional name="filter">
254 <option value="max_counts">Maximum number of counts</option> 307 <option value="max_counts">Maximum number of counts</option>
255 <option value="min_cells">Minimum number of cells expressed</option> 308 <option value="min_cells">Minimum number of cells expressed</option>
256 <option value="max_cells">Maximum number of cells expressed</option> 309 <option value="max_cells">Maximum number of cells expressed</option>
257 </param> 310 </param>
258 <when value="min_counts"> 311 <when value="min_counts">
259 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> 312 <param argument="min_counts" type="integer" min="0" value="" optional="true" label="Minimum number of counts required for a gene to pass filtering"/>
260 </when> 313 </when>
261 <when value="max_counts"> 314 <when value="max_counts">
262 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> 315 <param argument="max_counts" type="integer" min="0" value="" optional="true" label="Maximum number of counts required for a gene to pass filtering"/>
263 </when> 316 </when>
264 <when value="min_cells"> 317 <when value="min_cells">
265 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> 318 <param argument="min_cells" type="integer" min="0" value="" optional="true" label="Minimum number of cells expressed required for a gene to pass filtering"/>
266 </when> 319 </when>
267 <when value="max_cells"> 320 <when value="max_cells">
268 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> 321 <param argument="max_cells" type="integer" min="0" value="" optional="true" label="Maximum number of cells expressed required for a gene to pass filtering"/>
322 </when>
323 </conditional>
324 </when>
325 <when value="filter_any">
326 <param name="var_obs" type="select" label="What to filter?">
327 <option value="var">Variables (var)</option>
328 <option value="obs">Observations (obs)</option>
329 </param>
330 <conditional name="filter">
331 <param name="filter" type="select" label="Type of filtering?">
332 <option value="key">By key (column) values</option>
333 <option value="index">By index (row)</option>
334 </param>
335 <when value="key">
336 <param name="key" type="text" value="n_genes" label="Key to filter"/>
337 <conditional name="filter_key">
338 <param name="type" type="select" label="Type of value to filter">
339 <option value="number">Number</option>
340 <option value="text">Text</option>
341 <option value="boolean">Boolean</option>
342 </param>
343 <when value="number">
344 <param name="filter" type="select" label="Filter">
345 <option value="equal">equal to</option>
346 <option value="not_equal">not equal to</option>
347 <option value="less">less than</option>
348 <option value="less_or_equal">less than or equal to</option>
349 <option value="greater">greater than</option>
350 <option value="greater_or_equal">greater than or equal to</option>
351 </param>
352 <param name="value" type="float" value="2500" label="Value"/>
353 </when>
354 <when value="text">
355 <param name="filter" type="select" label="Filter">
356 <option value="equal">equal to</option>
357 <option value="not_equal">not equal to</option></param>
358 <param name="value" type="text" value="2500" label="Value"/>
359 </when>
360 <when value="boolean">
361 <param name="value" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Value to keep"/>
362 </when>
363 </conditional>
364 </when>
365 <when value="index">
366 <conditional name="index">
367 <param name="format" type="select" label="Format for the filter by index">
368 <option value="file">File</option>
369 <option value="text" selected="true">Text</option>
370 </param>
371 <when value="text">
372 <param name="text" type="text" value="" label="List of index to keep" help="Indexes separated by a comma"/>
373 </when>
374 <when value="file">
375 <param name="file" type="data" format="txt" label="File with the list of index to keep" help="One index per line"/>
376 </when>
377 </conditional>
269 </when> 378 </when>
270 </conditional> 379 </conditional>
271 </when> 380 </when>
272 <when value="tl.filter_rank_genes_groups"> 381 <when value="tl.filter_rank_genes_groups">
273 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"> 382 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored">
476 <has_h5_keys keys="obs/cell_type"/> 585 <has_h5_keys keys="obs/cell_type"/>
477 </assert_contents> 586 </assert_contents>
478 </output> 587 </output>
479 </test> 588 </test>
480 589
481 <!-- test 4 --> 590 <test expect_num_outputs="1">
591 <!-- test 4 -->
592 <param name="adata" value="krumsiek11.h5ad"/>
593 <conditional name="method">
594 <param name="method" value="filter_any"/>
595 <param name="var_obs" value="var"/>
596 <conditional name="filter">
597 <param name="filter" value="index"/>
598 <conditional name="index">
599 <param name="format" value="text"/>
600 <param name="text" value="Gata2,EKLF"/>
601 </conditional>
602 </conditional>
603 </conditional>
604 <assert_stdout>
605 <has_text_matching expression="640 × 2"/>
606 </assert_stdout>
607 <output name="anndata_out" ftype="h5ad">
608 <assert_contents>
609 <has_h5_keys keys="obs/cell_type"/>
610 <has_h5_keys keys="uns/highlights"/>
611 <has_h5_keys keys="uns/iroot"/>
612 </assert_contents>
613 </output>
614 </test>
615 <test expect_num_outputs="1">
616 <!-- test 5 -->
617 <param name="adata" value="krumsiek11.h5ad"/>
618 <conditional name="method">
619 <param name="method" value="filter_any"/>
620 <param name="var_obs" value="obs"/>
621 <conditional name="filter">
622 <param name="filter" value="key"/>
623 <param name="key" value="cell_type"/>
624 <conditional name="filter_key">
625 <param name="type" value="text"/>
626 <param name="filter" value="equal"/>
627 <param name="value" value="progenitor"/>
628 </conditional>
629 </conditional>
630 </conditional>
631 <assert_stdout>
632 <has_text_matching expression="320 × 11"/>
633 </assert_stdout>
634 <output name="anndata_out" ftype="h5ad">
635 <assert_contents>
636 <has_h5_keys keys="obs/cell_type"/>
637 <has_h5_keys keys="uns/highlights"/>
638 <has_h5_keys keys="uns/iroot"/>
639 </assert_contents>
640 </output>
641 </test>
642
643 <!-- test 6 -->
482 <!-- Fails to write to anndata after tl.filter_rank_genes_groups 644 <!-- Fails to write to anndata after tl.filter_rank_genes_groups
483 Issue has been reported here: https://github.com/scverse/anndata/issues/726 645 Issue has been reported here: https://github.com/scverse/anndata/issues/726
484 The current fix is: del adata.uns['rank_genes_groups_filtered'] --> 646 The current fix is: del adata.uns['rank_genes_groups_filtered'] -->
485 <!-- The issue is fixed in the script here --> 647 <!-- The issue is fixed in the script here -->
486 <test expect_num_outputs="2"> 648 <test expect_num_outputs="2">
509 <has_h5_keys keys="uns/rank_genes_groups_filtered"/> 671 <has_h5_keys keys="uns/rank_genes_groups_filtered"/>
510 </assert_contents> 672 </assert_contents>
511 </output> 673 </output>
512 </test> 674 </test>
513 675
514 <!-- test 5 --> 676 <!-- test 7 -->
515 <test expect_num_outputs="2"> 677 <test expect_num_outputs="2">
516 <param name="adata" value="blobs.h5ad"/> 678 <param name="adata" value="blobs.h5ad"/>
517 <conditional name="method"> 679 <conditional name="method">
518 <param name="method" value="pp.highly_variable_genes"/> 680 <param name="method" value="pp.highly_variable_genes"/>
519 </conditional> 681 </conditional>
537 <has_h5_keys keys="uns/hvg"/> 699 <has_h5_keys keys="uns/hvg"/>
538 </assert_contents> 700 </assert_contents>
539 </output> 701 </output>
540 </test> 702 </test>
541 703
542 <!-- test 6 --> 704 <!-- test 8 -->
543 <test expect_num_outputs="2"> 705 <test expect_num_outputs="2">
544 <param name="adata" value="krumsiek11.h5ad"/> 706 <param name="adata" value="krumsiek11.h5ad"/>
545 <conditional name="method"> 707 <conditional name="method">
546 <param name="method" value="pp.highly_variable_genes"/> 708 <param name="method" value="pp.highly_variable_genes"/>
547 <conditional name="flavor"> 709 <conditional name="flavor">
568 <has_h5_keys keys="uns/hvg"/> 730 <has_h5_keys keys="uns/hvg"/>
569 </assert_contents> 731 </assert_contents>
570 </output> 732 </output>
571 </test> 733 </test>
572 734
573 <!-- test 7 --> 735 <!-- test 9 -->
574 <test expect_num_outputs="2"> 736 <test expect_num_outputs="2">
575 <param name="adata" value="krumsiek11.h5ad"/> 737 <param name="adata" value="krumsiek11.h5ad"/>
576 <conditional name="method"> 738 <conditional name="method">
577 <param name="method" value="pp.subsample"/> 739 <param name="method" value="pp.subsample"/>
578 <conditional name="type"> 740 <conditional name="type">
598 <has_h5_keys keys="obs/cell_type"/> 760 <has_h5_keys keys="obs/cell_type"/>
599 </assert_contents> 761 </assert_contents>
600 </output> 762 </output>
601 </test> 763 </test>
602 764
603 <!-- test 8 --> 765 <!-- test 10 -->
604 <test expect_num_outputs="2"> 766 <test expect_num_outputs="2">
605 <param name="adata" value="krumsiek11.h5ad"/> 767 <param name="adata" value="krumsiek11.h5ad"/>
606 <conditional name="method"> 768 <conditional name="method">
607 <param name="method" value="pp.subsample"/> 769 <param name="method" value="pp.subsample"/>
608 <conditional name="type"> 770 <conditional name="type">
609 <param name="type" value="n_obs"/> 771 <param name="type" value="n_obs"/>
628 <has_h5_keys keys="obs/cell_type"/> 790 <has_h5_keys keys="obs/cell_type"/>
629 </assert_contents> 791 </assert_contents>
630 </output> 792 </output>
631 </test> 793 </test>
632 794
633 <!-- test 9 --> 795 <!-- test 11 -->
634 <test expect_num_outputs="2"> 796 <test expect_num_outputs="2">
635 <param name="adata" value="random-randint.h5ad"/> 797 <param name="adata" value="random-randint.h5ad"/>
636 <conditional name="method"> 798 <conditional name="method">
637 <param name="method" value="pp.downsample_counts"/> 799 <param name="method" value="pp.downsample_counts"/>
638 <param name="total_counts" value="20000"/> 800 <param name="total_counts" value="20000"/>
655 <has_h5_keys keys="var/index"/> 817 <has_h5_keys keys="var/index"/>
656 </assert_contents> 818 </assert_contents>
657 </output> 819 </output>
658 </test> 820 </test>
659 821
660 <!-- test 10 --> 822 <!-- test 12 -->
661 <test expect_num_outputs="2"> 823 <test expect_num_outputs="2">
662 <param name="adata" value="random-randint.h5ad"/> 824 <param name="adata" value="random-randint.h5ad"/>
663 <conditional name="method"> 825 <conditional name="method">
664 <param name="method" value="pp.downsample_counts"/> 826 <param name="method" value="pp.downsample_counts"/>
665 <param name="counts_per_cell" value="20000"/> 827 <param name="counts_per_cell" value="20000"/>
684 <has_h5_keys keys="var/index"/> 846 <has_h5_keys keys="var/index"/>
685 </assert_contents> 847 </assert_contents>
686 </output> 848 </output>
687 </test> 849 </test>
688 850
689 <!-- test 10 --> 851 <!-- test 13 -->
690 <test expect_num_outputs="2"> 852 <test expect_num_outputs="2">
691 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/> 853 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/>
692 <conditional name="method"> 854 <conditional name="method">
693 <param name="method" value="filter_marker"/> 855 <param name="method" value="filter_marker"/>
694 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/> 856 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/>
714 <has_text text="GZMB"/> 876 <has_text text="GZMB"/>
715 </assert_contents> 877 </assert_contents>
716 </output> 878 </output>
717 </test> 879 </test>
718 880
719 <!-- test 11 --> 881 <!-- test 14 -->
720 <test expect_num_outputs="2"> 882 <test expect_num_outputs="2">
721 <param name="adata" value="krumsiek11.h5ad"/> 883 <param name="adata" value="krumsiek11.h5ad"/>
722 <conditional name="method"> 884 <conditional name="method">
723 <param name="method" value="pp.scrublet"/> 885 <param name="method" value="pp.scrublet"/>
724 <param name="n_prin_comps" value="5"/> 886 <param name="n_prin_comps" value="5"/>