comparison filter.xml @ 12:d600e0947468 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
author iuc
date Wed, 31 Jul 2024 18:06:35 +0000
parents 97b82bb0bb7e
children e299752da98e
comparison
equal deleted inserted replaced
11:c7ccb6ba94fb 12:d600e0947468
1 <tool id="scanpy_filter" name="Filter" version="@galaxy_version@" profile="@profile@"> 1 <tool id="scanpy_filter" name="Filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
2 <description>with scanpy</description> 2 <description>with scanpy</description>
3 <expand macro="bio_tools"/>
4 <macros> 3 <macros>
5 <import>macros.xml</import> 4 <import>macros.xml</import>
6 </macros> 5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <expand macro="version_command"/> 8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[ 9 <command detect_errors="exit_code"><![CDATA[
10 @CMD@ 10 @CMD@
11 ]]></command> 11 ]]></command>
43 copy=False) 43 copy=False)
44 44
45 #else if $method.method == 'tl.filter_rank_genes_groups' 45 #else if $method.method == 'tl.filter_rank_genes_groups'
46 sc.tl.filter_rank_genes_groups( 46 sc.tl.filter_rank_genes_groups(
47 adata, 47 adata,
48 #if str($method.key) != '' 48 #if $method.key
49 key='$method.key', 49 key='$method.key',
50 #end if 50 #end if
51 #if str($method.groupby) != '' 51 #if $method.groupby
52 groupby='$method.groupby', 52 groupby='$method.groupby',
53 #end if 53 #end if
54 use_raw=$method.use_raw, 54 use_raw=$method.use_raw,
55 log=$method.log,
56 key_added='$method.key_added', 55 key_added='$method.key_added',
57 min_in_group_fraction=$method.min_in_group_fraction, 56 min_in_group_fraction=$method.min_in_group_fraction,
58 max_out_group_fraction=$method.max_out_group_fraction, 57 max_out_group_fraction=$method.max_out_group_fraction,
59 min_fold_change=$method.min_fold_change) 58 min_fold_change=$method.min_fold_change)
60 59
61 #else if $method.method == "pp.highly_variable_genes" 60 #else if $method.method == "pp.highly_variable_genes"
62 sc.pp.highly_variable_genes( 61 sc.pp.highly_variable_genes(
63 adata=adata, 62 adata=adata,
64 flavor='$method.flavor.flavor', 63 flavor='$method.flavor.flavor',
65 #if $method.flavor.flavor == 'seurat' 64 #if $method.flavor.flavor == 'seurat'
66 #if str($method.flavor.min_mean) != '' 65 min_mean=$method.flavor.min_mean,
67 min_mean=$method.flavor.min_mean, 66 max_mean=$method.flavor.max_mean,
68 #end if 67 min_disp=$method.flavor.min_disp,
69 #if str($method.flavor.max_mean) != ''
70 max_mean=$method.flavor.max_mean,
71 #end if
72 #if str($method.flavor.min_disp) != ''
73 min_disp=$method.flavor.min_disp,
74 #end if
75 #if str($method.flavor.max_disp) != '' 68 #if str($method.flavor.max_disp) != ''
76 max_disp=$method.flavor.max_disp, 69 max_disp=$method.flavor.max_disp,
77 #end if 70 #end if
78 #else if $method.flavor.flavor == 'cell_ranger' 71 #else if $method.flavor.flavor == 'cell_ranger'
79 n_top_genes=$method.flavor.n_top_genes, 72 n_top_genes=$method.flavor.n_top_genes,
80 #end if 73 #end if
81 n_bins=$method.n_bins, 74 n_bins=$method.n_bins,
103 total_counts=$method.total_counts, 96 total_counts=$method.total_counts,
104 #end if 97 #end if
105 random_state=$method.random_state, 98 random_state=$method.random_state,
106 replace=$method.replace, 99 replace=$method.replace,
107 copy=False) 100 copy=False)
101
102 #else if $method.method == "filter_marker"
103
104 #if $method.layer_selection.use_raw == 'False':
105 adata.X = adata.layers['$method.layer_selection.layer']
106 #end if
107
108 def check_marker(adata, group, gene, thresh_mean, thresh_frac, groupby):
109 filtered_data = adata[adata.obs[groupby] == group, adata.var_names == gene]
110 mean_expression = np.mean(filtered_data.X)
111 frac_cell_mean_expression = len(filtered_data.X[filtered_data.X > mean_expression]) / filtered_data.n_obs
112 if ( mean_expression > thresh_mean and frac_cell_mean_expression >= thresh_frac ):
113 return(True)
114 return(False)
115
116 header='infer'
117
118 #if $method.header == 'not_included':
119 header=None
120 #end if
121
122 marker_list={key: list(value.values()) for key, value in pd.read_csv('$method.markerfile', sep='\t', index_col=0, header=header).to_dict(orient='index').items()}
123
124 for key, value in marker_list.items():
125 marker_list[key] = [x for x in value if check_marker(adata, key, x, $method.thresh_mean, $method.thresh_frac, '$method.groupby')]
126
127 # Find the maximum length of lists
128 max_len = max(len(lst) for lst in marker_list.values())
129
130 # Fill smaller lists with empty values
131 for key, value in marker_list.items():
132 marker_list[key] = value + [''] * (max_len - len(value))
133
134 df = pd.DataFrame(marker_list).T
135 df.to_csv('marker.tsv', sep='\t', index=True)
108 #end if 136 #end if
109 137
110 @CMD_anndata_write_outputs@ 138 @CMD_anndata_write_outputs@
111 ]]></configfile> 139 ]]></configfile>
112 </configfiles> 140 </configfiles>
118 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> 146 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option>
119 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> 147 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option>
120 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> 148 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option>
121 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> 149 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option>
122 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> 150 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option>
151 <option value="filter_marker">Filter markers from count matrix and marker list</option>
123 </param> 152 </param>
124 <when value="pp.filter_cells"> 153 <when value="pp.filter_cells">
125 <conditional name="filter"> 154 <conditional name="filter">
126 <param argument="filter" type="select" label="Filter"> 155 <param argument="filter" type="select" label="Filter">
127 <option value="min_counts">Minimum number of counts</option> 156 <option value="min_counts">Minimum number of counts</option>
171 </param> 200 </param>
172 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"> 201 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider">
173 <expand macro="sanitize_query" /> 202 <expand macro="sanitize_query" />
174 </param> 203 </param>
175 <expand macro="param_use_raw"/> 204 <expand macro="param_use_raw"/>
176 <expand macro="param_log"/>
177 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"> 205 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values">
178 <expand macro="sanitize_query" /> 206 <expand macro="sanitize_query" />
179 </param> 207 </param>
180 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> 208 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/>
181 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> 209 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/>
219 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> 247 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/>
220 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> 248 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/>
221 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> 249 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/>
222 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> 250 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/>
223 </when> 251 </when>
252 <when value="filter_marker">
253 <param argument="markerfile" type="data" format="tabular" label="List of markers" help="This should be a tsv where row = group (e.g. celltypes) and columns = markers."></param>
254 <param name="header" type="select" label="Header in the list of markers?">
255 <option value="included">Header incldued</option>
256 <option value="not_included">Header not included</option>
257 </param>
258 <param argument="thresh_mean" type="float" min="0.0" value="1.0" label="Minimal average count of all cells of a group (e.g., celltype) for a particular marker" help="Increasing the threshold will result in a smaller marker set."/>
259 <param argument="thresh_frac" type="float" min="0.0" max="1.0" value="0.1" label="Minimal fractions of cells that has a higher count than the average count of all cells of the group for the marker" help="Increasing this threshold might remove marker outliers."/>
260 <conditional name="layer_selection">
261 <param name="use_raw" type="select" label="Use .X of adata to perform the filtering" help="">
262 <option value="True">Yes</option>
263 <option value="False">No</option>
264 </param>
265 <when value="False">
266 <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to filter" help="If layers specified then use adata.layers[layer]."/>
267 </when>
268 <when value="True"/>
269 </conditional>
270 <param argument="groupby" type="text" value="" label="The key of the observation grouping to consider (e.g., celltype)" help="">
271 <expand macro="sanitize_query" />
272 </param>
273 </when>
224 </conditional> 274 </conditional>
225 <expand macro="inputs_common_advanced"/> 275 <expand macro="inputs_common_advanced"/>
226 </inputs> 276 </inputs>
227 <outputs> 277 <outputs>
228 <expand macro="anndata_outputs"/> 278 <expand macro="anndata_outputs"/>
279 <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers">
280 <filter>method['method'] == 'filter_marker'</filter>
281 </data>
229 </outputs> 282 </outputs>
230 <tests> 283 <tests>
231 <test> 284 <test expect_num_outputs="2">
232 <!-- test 0 --> 285 <!-- test 1 -->
233 <param name="adata" value="krumsiek11.h5ad" /> 286 <param name="adata" value="krumsiek11.h5ad" />
234 <conditional name="method"> 287 <conditional name="method">
235 <param name="method" value="pp.filter_cells"/> 288 <param name="method" value="pp.filter_cells"/>
236 <conditional name="filter"> 289 <conditional name="filter">
237 <param name="filter" value="min_counts"/> 290 <param name="filter" value="min_counts"/>
250 <has_text_matching expression="min_counts=3"/> 303 <has_text_matching expression="min_counts=3"/>
251 </assert_contents> 304 </assert_contents>
252 </output> 305 </output>
253 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> 306 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/>
254 </test> 307 </test>
255 <test> 308 <test expect_num_outputs="2">
256 <!-- test 1 --> 309 <!-- test 2 -->
257 <param name="adata" value="krumsiek11.h5ad" /> 310 <param name="adata" value="krumsiek11.h5ad" />
258 <conditional name="method"> 311 <conditional name="method">
259 <param name="method" value="pp.filter_cells"/> 312 <param name="method" value="pp.filter_cells"/>
260 <conditional name="filter"> 313 <conditional name="filter">
261 <param name="filter" value="max_genes"/> 314 <param name="filter" value="max_genes"/>
272 <has_text_matching expression="max_genes=100"/> 325 <has_text_matching expression="max_genes=100"/>
273 </assert_contents> 326 </assert_contents>
274 </output> 327 </output>
275 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> 328 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/>
276 </test> 329 </test>
277 <test> 330 <test expect_num_outputs="2">
278 <!-- test 2 --> 331 <!-- test 3 -->
279 <param name="adata" value="krumsiek11.h5ad" /> 332 <param name="adata" value="krumsiek11.h5ad" />
280 <conditional name="method"> 333 <conditional name="method">
281 <param name="method" value="pp.filter_genes"/> 334 <param name="method" value="pp.filter_genes"/>
282 <conditional name="filter"> 335 <conditional name="filter">
283 <param name="filter" value="min_counts"/> 336 <param name="filter" value="min_counts"/>
293 <has_text_matching expression="min_counts=3"/> 346 <has_text_matching expression="min_counts=3"/>
294 </assert_contents> 347 </assert_contents>
295 </output> 348 </output>
296 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> 349 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/>
297 </test> 350 </test>
298 <!-- <test> --> 351
299 <!-- <!-\- test 3 -\-> --> 352 <!-- test 4 -->
300 <!-- <!-\- Input dataset appears to be missing rank_genes_groups key... -\-> --> 353 <!-- Fails to write to anndata after tl.filter_rank_genes_groups
301 <!-- <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> --> 354 Issue has been reported here: https://github.com/scverse/anndata/issues/726
302 <!-- <conditional name="method"> --> 355 The current fix is: del adata.uns['rank_genes_groups_filtered'] -->
303 <!-- <param name="method" value="tl.filter_rank_genes_groups"/> --> 356 <!--<test expect_num_outputs="2">
304 <!-- <param name="key" value="rank_genes_groups"/> --> 357 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" />
305 <!-- <param name="use_raw" value="False"/> --> 358 <conditional name="method">
306 <!-- <param name="log" value="False"/> --> 359 <param name="method" value="tl.filter_rank_genes_groups"/>
307 <!-- <param name="key_added" value="rank_genes_groups_filtered"/> --> 360 <param name="key" value="rank_genes_groups"/>
308 <!-- <param name="min_in_group_fraction" value="0.25"/> --> 361 <param name="use_raw" value="False"/>
309 <!-- <param name="max_out_group_fraction" value="0.5"/> --> 362 <param name="key_added" value="rank_genes_groups_filtered"/>
310 <!-- <param name="min_fold_change" value="3"/> --> 363 <param name="min_in_group_fraction" value="0.25"/>
311 <!-- </conditional> --> 364 <param name="max_out_group_fraction" value="0.5"/>
312 <!-- <output name="hidden_output"> --> 365 <param name="min_fold_change" value="3"/>
313 <!-- <assert_contents> --> 366 </conditional>
314 <!-- <has_text_matching expression="tl.filter_rank_genes_groups"/> --> 367 <section name="advanced_common">
315 <!-- <has_text_matching expression="key='rank_genes_groups'"/> --> 368 <param name="show_log" value="true" />
316 <!-- <has_text_matching expression="use_raw=False"/> --> 369 </section>
317 <!-- <has_text_matching expression="log=False"/> --> 370 <output name="hidden_output">
318 <!-- <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> --> 371 <assert_contents>
319 <!-- <has_text_matching expression="min_in_group_fraction=0.25"/> --> 372 <has_text_matching expression="tl.filter_rank_genes_groups"/>
320 <!-- <has_text_matching expression="max_out_group_fraction=0.5"/> --> 373 <has_text_matching expression="key='rank_genes_groups'"/>
321 <!-- <has_text_matching expression="min_fold_change=3"/> --> 374 <has_text_matching expression="use_raw=False"/>
322 <!-- </assert_contents> --> 375 <has_text_matching expression="log=False"/>
323 <!-- </output> --> 376 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/>
324 <!-- <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> --> 377 <has_text_matching expression="min_in_group_fraction=0.25"/>
325 <!-- </test> --> 378 <has_text_matching expression="max_out_group_fraction=0.5"/>
326 <test> 379 <has_text_matching expression="min_fold_change=3"/>
327 <!-- test 4 --> 380 </assert_contents>
381 </output>
382 <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/>
383 </test>-->
384 <test expect_num_outputs="2">
385 <!-- test 5 -->
328 <param name="adata" value="blobs.h5ad"/> 386 <param name="adata" value="blobs.h5ad"/>
329 <conditional name="method"> 387 <conditional name="method">
330 <param name="method" value="pp.highly_variable_genes"/> 388 <param name="method" value="pp.highly_variable_genes"/>
331 <conditional name="flavor"> 389 <conditional name="flavor">
332 <param name="flavor" value="seurat"/> 390 <param name="flavor" value="seurat"/>
351 <has_text_matching expression="subset=False"/> 409 <has_text_matching expression="subset=False"/>
352 </assert_contents> 410 </assert_contents>
353 </output> 411 </output>
354 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.2"/> 412 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.2"/>
355 </test> 413 </test>
356 <test> 414 <test expect_num_outputs="2">
357 <!-- test 5 --> 415 <!-- test 6 -->
358 <param name="adata" value="krumsiek11.h5ad" /> 416 <param name="adata" value="krumsiek11.h5ad" />
359 <conditional name="method"> 417 <conditional name="method">
360 <param name="method" value="pp.highly_variable_genes"/> 418 <param name="method" value="pp.highly_variable_genes"/>
361 <conditional name="flavor"> 419 <conditional name="flavor">
362 <param name="flavor" value="cell_ranger"/> 420 <param name="flavor" value="cell_ranger"/>
377 <has_text_matching expression="subset=True"/> 435 <has_text_matching expression="subset=True"/>
378 </assert_contents> 436 </assert_contents>
379 </output> 437 </output>
380 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.9"/> 438 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.9"/>
381 </test> 439 </test>
382 <test> 440 <test expect_num_outputs="2">
383 <!-- test 6 --> 441 <!-- test 7 -->
384 <param name="adata" value="krumsiek11.h5ad" /> 442 <param name="adata" value="krumsiek11.h5ad" />
385 <conditional name="method"> 443 <conditional name="method">
386 <param name="method" value="pp.subsample"/> 444 <param name="method" value="pp.subsample"/>
387 <conditional name="type"> 445 <conditional name="type">
388 <param name="type" value="fraction" /> 446 <param name="type" value="fraction" />
400 <has_text_matching expression="random_state=0"/> 458 <has_text_matching expression="random_state=0"/>
401 </assert_contents> 459 </assert_contents>
402 </output> 460 </output>
403 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> 461 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/>
404 </test> 462 </test>
405 <test> 463 <test expect_num_outputs="2">
406 <!-- test 7 --> 464 <!-- test 8 -->
407 <param name="adata" value="krumsiek11.h5ad" /> 465 <param name="adata" value="krumsiek11.h5ad" />
408 <conditional name="method"> 466 <conditional name="method">
409 <param name="method" value="pp.subsample"/> 467 <param name="method" value="pp.subsample"/>
410 <conditional name="type"> 468 <conditional name="type">
411 <param name="type" value="n_obs" /> 469 <param name="type" value="n_obs" />
423 <has_text_matching expression="random_state=0"/> 481 <has_text_matching expression="random_state=0"/>
424 </assert_contents> 482 </assert_contents>
425 </output> 483 </output>
426 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> 484 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/>
427 </test> 485 </test>
428 <test> 486 <test expect_num_outputs="2">
429 <!-- test 8 --> 487 <!-- test 9 -->
430 <param name="adata" value="random-randint.h5ad" /> 488 <param name="adata" value="random-randint.h5ad" />
431 <conditional name="method"> 489 <conditional name="method">
432 <param name="method" value="pp.downsample_counts"/> 490 <param name="method" value="pp.downsample_counts"/>
433 <param name="total_counts" value="20000"/> 491 <param name="total_counts" value="20000"/>
434 <param name="random_state" value="0"/> 492 <param name="random_state" value="0"/>
445 <has_text_matching expression="replace=False"/> 503 <has_text_matching expression="replace=False"/>
446 </assert_contents> 504 </assert_contents>
447 </output> 505 </output>
448 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/> 506 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/>
449 </test> 507 </test>
508 <test expect_num_outputs="3">
509 <!-- test 10 -->
510 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" />
511 <conditional name="method">
512 <param name="method" value="filter_marker"/>
513 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/>
514 <param name="thresh_mean" value="1.0"/>
515 <param name="thresh_frac" value="0.2"/>
516 <param name="layer_selection" value="True"/>
517 <param name="groupby" value="bulk_labels"/>
518 </conditional>
519 <section name="advanced_common">
520 <param name="show_log" value="true" />
521 </section>
522 <output name="hidden_output">
523 <assert_contents>
524 <has_text_matching expression="adata, key, x, 1.0, 0.2, 'bulk_labels'"/>
525 </assert_contents>
526 </output>
527 <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" ftype="h5ad">
528 <assert_contents>
529 <has_h5_keys keys="obs, var, uns" />
530 </assert_contents>
531 </output>
532 <output name="marker_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv" ftype="tabular" compare="sim_size"/>
533 </test>
450 </tests> 534 </tests>
451 <help><![CDATA[ 535 <help><![CDATA[
452 536
453 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) 537 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`)
454 ======================================================================================== 538 ========================================================================================
459 543
460 Only provide one of the optional parameters `min_counts`, `min_genes`, 544 Only provide one of the optional parameters `min_counts`, `min_genes`,
461 `max_counts`, `max_genes` per call. 545 `max_counts`, `max_genes` per call.
462 546
463 More details on the `scanpy documentation 547 More details on the `scanpy documentation
464 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.filter_cells.html>`__ 548 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.filter_cells.html>`__
465 549
466 550
467 Filter genes based on number of cells or counts (`pp.filter_genes`) 551 Filter genes based on number of cells or counts (`pp.filter_genes`)
468 =================================================================== 552 ===================================================================
469 553
473 557
474 Only provide one of the optional parameters `min_counts`, `min_cells`, 558 Only provide one of the optional parameters `min_counts`, `min_cells`,
475 `max_counts`, `max_cells` per call. 559 `max_counts`, `max_cells` per call.
476 560
477 More details on the `scanpy documentation 561 More details on the `scanpy documentation
478 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.filter_genes.html>`__ 562 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.filter_genes.html>`__
479 563
480 564
481 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) 565 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`)
482 ========================================================================================================================================================== 566 ==========================================================================================================================================================
483 567
484 More details on the `scanpy documentation 568 More details on the `scanpy documentation
485 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.filter_rank_genes_groups.html>`__ 569 <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__
486 570
487 571
488 Annotate highly variable genes (`pp.highly_variable_genes`) 572 Annotate highly variable genes (`pp.highly_variable_genes`)
489 =========================================================== 573 ===========================================================
490 574
495 579
496 Subsample to a fraction of the number of observations (`pp.subsample`) 580 Subsample to a fraction of the number of observations (`pp.subsample`)
497 ====================================================================== 581 ======================================================================
498 582
499 More details on the `scanpy documentation 583 More details on the `scanpy documentation
500 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.subsample.html>`__ 584 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.subsample.html>`__
501 585
502 Downsample counts (`pp.downsample_counts`) 586 Downsample counts (`pp.downsample_counts`)
503 ========================================== 587 ==========================================
504 588
505 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This 589 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
506 has been implemented by M. D. Luecken. 590 has been implemented by M. D. Luecken.
507 591
592
593 Filter marker genes (`filter_marker`)
594 ======================================================================
595
596 This option is specific for celltype marker gene detection. You can generate a celltype marker gene file (tsv) with **COSG** provided at Galaxy.
597
598 The marker gene file should have as rows celltypes and columns as marker genes. Each celltype can have varying number of marker genes.
599
600 A marker gene is returned (retained in the list) if the mean expression of the marker gene is bigger than the threshold of mean expression (thresh_mean) and if the fraction of cells with the marker gene expression is equal or higher than the cell fraction threshold (thresh_frac).
601
508 More details on the `scanpy documentation 602 More details on the `scanpy documentation
509 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.downsample_counts.html>`__ 603 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.downsample_counts.html>`__
510 604
511 605
512 ]]></help> 606 ]]></help>
513 <expand macro="citations"/> 607 <expand macro="citations"/>
514 </tool> 608 </tool>