Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 12:d600e0947468 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
| author | iuc |
|---|---|
| date | Wed, 31 Jul 2024 18:06:35 +0000 |
| parents | 97b82bb0bb7e |
| children | e299752da98e |
comparison
equal
deleted
inserted
replaced
| 11:c7ccb6ba94fb | 12:d600e0947468 |
|---|---|
| 1 <tool id="scanpy_filter" name="Filter" version="@galaxy_version@" profile="@profile@"> | 1 <tool id="scanpy_filter" name="Filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> |
| 2 <description>with scanpy</description> | 2 <description>with scanpy</description> |
| 3 <expand macro="bio_tools"/> | |
| 4 <macros> | 3 <macros> |
| 5 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 6 </macros> | 5 </macros> |
| 6 <expand macro="bio_tools"/> | |
| 7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
| 8 <expand macro="version_command"/> | 8 <expand macro="version_command"/> |
| 9 <command detect_errors="exit_code"><![CDATA[ | 9 <command detect_errors="exit_code"><![CDATA[ |
| 10 @CMD@ | 10 @CMD@ |
| 11 ]]></command> | 11 ]]></command> |
| 43 copy=False) | 43 copy=False) |
| 44 | 44 |
| 45 #else if $method.method == 'tl.filter_rank_genes_groups' | 45 #else if $method.method == 'tl.filter_rank_genes_groups' |
| 46 sc.tl.filter_rank_genes_groups( | 46 sc.tl.filter_rank_genes_groups( |
| 47 adata, | 47 adata, |
| 48 #if str($method.key) != '' | 48 #if $method.key |
| 49 key='$method.key', | 49 key='$method.key', |
| 50 #end if | 50 #end if |
| 51 #if str($method.groupby) != '' | 51 #if $method.groupby |
| 52 groupby='$method.groupby', | 52 groupby='$method.groupby', |
| 53 #end if | 53 #end if |
| 54 use_raw=$method.use_raw, | 54 use_raw=$method.use_raw, |
| 55 log=$method.log, | |
| 56 key_added='$method.key_added', | 55 key_added='$method.key_added', |
| 57 min_in_group_fraction=$method.min_in_group_fraction, | 56 min_in_group_fraction=$method.min_in_group_fraction, |
| 58 max_out_group_fraction=$method.max_out_group_fraction, | 57 max_out_group_fraction=$method.max_out_group_fraction, |
| 59 min_fold_change=$method.min_fold_change) | 58 min_fold_change=$method.min_fold_change) |
| 60 | 59 |
| 61 #else if $method.method == "pp.highly_variable_genes" | 60 #else if $method.method == "pp.highly_variable_genes" |
| 62 sc.pp.highly_variable_genes( | 61 sc.pp.highly_variable_genes( |
| 63 adata=adata, | 62 adata=adata, |
| 64 flavor='$method.flavor.flavor', | 63 flavor='$method.flavor.flavor', |
| 65 #if $method.flavor.flavor == 'seurat' | 64 #if $method.flavor.flavor == 'seurat' |
| 66 #if str($method.flavor.min_mean) != '' | 65 min_mean=$method.flavor.min_mean, |
| 67 min_mean=$method.flavor.min_mean, | 66 max_mean=$method.flavor.max_mean, |
| 68 #end if | 67 min_disp=$method.flavor.min_disp, |
| 69 #if str($method.flavor.max_mean) != '' | |
| 70 max_mean=$method.flavor.max_mean, | |
| 71 #end if | |
| 72 #if str($method.flavor.min_disp) != '' | |
| 73 min_disp=$method.flavor.min_disp, | |
| 74 #end if | |
| 75 #if str($method.flavor.max_disp) != '' | 68 #if str($method.flavor.max_disp) != '' |
| 76 max_disp=$method.flavor.max_disp, | 69 max_disp=$method.flavor.max_disp, |
| 77 #end if | 70 #end if |
| 78 #else if $method.flavor.flavor == 'cell_ranger' | 71 #else if $method.flavor.flavor == 'cell_ranger' |
| 79 n_top_genes=$method.flavor.n_top_genes, | 72 n_top_genes=$method.flavor.n_top_genes, |
| 80 #end if | 73 #end if |
| 81 n_bins=$method.n_bins, | 74 n_bins=$method.n_bins, |
| 103 total_counts=$method.total_counts, | 96 total_counts=$method.total_counts, |
| 104 #end if | 97 #end if |
| 105 random_state=$method.random_state, | 98 random_state=$method.random_state, |
| 106 replace=$method.replace, | 99 replace=$method.replace, |
| 107 copy=False) | 100 copy=False) |
| 101 | |
| 102 #else if $method.method == "filter_marker" | |
| 103 | |
| 104 #if $method.layer_selection.use_raw == 'False': | |
| 105 adata.X = adata.layers['$method.layer_selection.layer'] | |
| 106 #end if | |
| 107 | |
| 108 def check_marker(adata, group, gene, thresh_mean, thresh_frac, groupby): | |
| 109 filtered_data = adata[adata.obs[groupby] == group, adata.var_names == gene] | |
| 110 mean_expression = np.mean(filtered_data.X) | |
| 111 frac_cell_mean_expression = len(filtered_data.X[filtered_data.X > mean_expression]) / filtered_data.n_obs | |
| 112 if ( mean_expression > thresh_mean and frac_cell_mean_expression >= thresh_frac ): | |
| 113 return(True) | |
| 114 return(False) | |
| 115 | |
| 116 header='infer' | |
| 117 | |
| 118 #if $method.header == 'not_included': | |
| 119 header=None | |
| 120 #end if | |
| 121 | |
| 122 marker_list={key: list(value.values()) for key, value in pd.read_csv('$method.markerfile', sep='\t', index_col=0, header=header).to_dict(orient='index').items()} | |
| 123 | |
| 124 for key, value in marker_list.items(): | |
| 125 marker_list[key] = [x for x in value if check_marker(adata, key, x, $method.thresh_mean, $method.thresh_frac, '$method.groupby')] | |
| 126 | |
| 127 # Find the maximum length of lists | |
| 128 max_len = max(len(lst) for lst in marker_list.values()) | |
| 129 | |
| 130 # Fill smaller lists with empty values | |
| 131 for key, value in marker_list.items(): | |
| 132 marker_list[key] = value + [''] * (max_len - len(value)) | |
| 133 | |
| 134 df = pd.DataFrame(marker_list).T | |
| 135 df.to_csv('marker.tsv', sep='\t', index=True) | |
| 108 #end if | 136 #end if |
| 109 | 137 |
| 110 @CMD_anndata_write_outputs@ | 138 @CMD_anndata_write_outputs@ |
| 111 ]]></configfile> | 139 ]]></configfile> |
| 112 </configfiles> | 140 </configfiles> |
| 118 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> | 146 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> |
| 119 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> | 147 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> |
| 120 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> | 148 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> |
| 121 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> | 149 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> |
| 122 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> | 150 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> |
| 151 <option value="filter_marker">Filter markers from count matrix and marker list</option> | |
| 123 </param> | 152 </param> |
| 124 <when value="pp.filter_cells"> | 153 <when value="pp.filter_cells"> |
| 125 <conditional name="filter"> | 154 <conditional name="filter"> |
| 126 <param argument="filter" type="select" label="Filter"> | 155 <param argument="filter" type="select" label="Filter"> |
| 127 <option value="min_counts">Minimum number of counts</option> | 156 <option value="min_counts">Minimum number of counts</option> |
| 171 </param> | 200 </param> |
| 172 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"> | 201 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"> |
| 173 <expand macro="sanitize_query" /> | 202 <expand macro="sanitize_query" /> |
| 174 </param> | 203 </param> |
| 175 <expand macro="param_use_raw"/> | 204 <expand macro="param_use_raw"/> |
| 176 <expand macro="param_log"/> | |
| 177 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"> | 205 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"> |
| 178 <expand macro="sanitize_query" /> | 206 <expand macro="sanitize_query" /> |
| 179 </param> | 207 </param> |
| 180 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> | 208 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> |
| 181 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> | 209 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> |
| 219 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> | 247 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> |
| 220 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> | 248 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> |
| 221 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> | 249 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> |
| 222 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> | 250 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> |
| 223 </when> | 251 </when> |
| 252 <when value="filter_marker"> | |
| 253 <param argument="markerfile" type="data" format="tabular" label="List of markers" help="This should be a tsv where row = group (e.g. celltypes) and columns = markers."></param> | |
| 254 <param name="header" type="select" label="Header in the list of markers?"> | |
| 255 <option value="included">Header incldued</option> | |
| 256 <option value="not_included">Header not included</option> | |
| 257 </param> | |
| 258 <param argument="thresh_mean" type="float" min="0.0" value="1.0" label="Minimal average count of all cells of a group (e.g., celltype) for a particular marker" help="Increasing the threshold will result in a smaller marker set."/> | |
| 259 <param argument="thresh_frac" type="float" min="0.0" max="1.0" value="0.1" label="Minimal fractions of cells that has a higher count than the average count of all cells of the group for the marker" help="Increasing this threshold might remove marker outliers."/> | |
| 260 <conditional name="layer_selection"> | |
| 261 <param name="use_raw" type="select" label="Use .X of adata to perform the filtering" help=""> | |
| 262 <option value="True">Yes</option> | |
| 263 <option value="False">No</option> | |
| 264 </param> | |
| 265 <when value="False"> | |
| 266 <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to filter" help="If layers specified then use adata.layers[layer]."/> | |
| 267 </when> | |
| 268 <when value="True"/> | |
| 269 </conditional> | |
| 270 <param argument="groupby" type="text" value="" label="The key of the observation grouping to consider (e.g., celltype)" help=""> | |
| 271 <expand macro="sanitize_query" /> | |
| 272 </param> | |
| 273 </when> | |
| 224 </conditional> | 274 </conditional> |
| 225 <expand macro="inputs_common_advanced"/> | 275 <expand macro="inputs_common_advanced"/> |
| 226 </inputs> | 276 </inputs> |
| 227 <outputs> | 277 <outputs> |
| 228 <expand macro="anndata_outputs"/> | 278 <expand macro="anndata_outputs"/> |
| 279 <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"> | |
| 280 <filter>method['method'] == 'filter_marker'</filter> | |
| 281 </data> | |
| 229 </outputs> | 282 </outputs> |
| 230 <tests> | 283 <tests> |
| 231 <test> | 284 <test expect_num_outputs="2"> |
| 232 <!-- test 0 --> | 285 <!-- test 1 --> |
| 233 <param name="adata" value="krumsiek11.h5ad" /> | 286 <param name="adata" value="krumsiek11.h5ad" /> |
| 234 <conditional name="method"> | 287 <conditional name="method"> |
| 235 <param name="method" value="pp.filter_cells"/> | 288 <param name="method" value="pp.filter_cells"/> |
| 236 <conditional name="filter"> | 289 <conditional name="filter"> |
| 237 <param name="filter" value="min_counts"/> | 290 <param name="filter" value="min_counts"/> |
| 250 <has_text_matching expression="min_counts=3"/> | 303 <has_text_matching expression="min_counts=3"/> |
| 251 </assert_contents> | 304 </assert_contents> |
| 252 </output> | 305 </output> |
| 253 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> | 306 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> |
| 254 </test> | 307 </test> |
| 255 <test> | 308 <test expect_num_outputs="2"> |
| 256 <!-- test 1 --> | 309 <!-- test 2 --> |
| 257 <param name="adata" value="krumsiek11.h5ad" /> | 310 <param name="adata" value="krumsiek11.h5ad" /> |
| 258 <conditional name="method"> | 311 <conditional name="method"> |
| 259 <param name="method" value="pp.filter_cells"/> | 312 <param name="method" value="pp.filter_cells"/> |
| 260 <conditional name="filter"> | 313 <conditional name="filter"> |
| 261 <param name="filter" value="max_genes"/> | 314 <param name="filter" value="max_genes"/> |
| 272 <has_text_matching expression="max_genes=100"/> | 325 <has_text_matching expression="max_genes=100"/> |
| 273 </assert_contents> | 326 </assert_contents> |
| 274 </output> | 327 </output> |
| 275 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> | 328 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> |
| 276 </test> | 329 </test> |
| 277 <test> | 330 <test expect_num_outputs="2"> |
| 278 <!-- test 2 --> | 331 <!-- test 3 --> |
| 279 <param name="adata" value="krumsiek11.h5ad" /> | 332 <param name="adata" value="krumsiek11.h5ad" /> |
| 280 <conditional name="method"> | 333 <conditional name="method"> |
| 281 <param name="method" value="pp.filter_genes"/> | 334 <param name="method" value="pp.filter_genes"/> |
| 282 <conditional name="filter"> | 335 <conditional name="filter"> |
| 283 <param name="filter" value="min_counts"/> | 336 <param name="filter" value="min_counts"/> |
| 293 <has_text_matching expression="min_counts=3"/> | 346 <has_text_matching expression="min_counts=3"/> |
| 294 </assert_contents> | 347 </assert_contents> |
| 295 </output> | 348 </output> |
| 296 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> | 349 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> |
| 297 </test> | 350 </test> |
| 298 <!-- <test> --> | 351 |
| 299 <!-- <!-\- test 3 -\-> --> | 352 <!-- test 4 --> |
| 300 <!-- <!-\- Input dataset appears to be missing rank_genes_groups key... -\-> --> | 353 <!-- Fails to write to anndata after tl.filter_rank_genes_groups |
| 301 <!-- <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> --> | 354 Issue has been reported here: https://github.com/scverse/anndata/issues/726 |
| 302 <!-- <conditional name="method"> --> | 355 The current fix is: del adata.uns['rank_genes_groups_filtered'] --> |
| 303 <!-- <param name="method" value="tl.filter_rank_genes_groups"/> --> | 356 <!--<test expect_num_outputs="2"> |
| 304 <!-- <param name="key" value="rank_genes_groups"/> --> | 357 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> |
| 305 <!-- <param name="use_raw" value="False"/> --> | 358 <conditional name="method"> |
| 306 <!-- <param name="log" value="False"/> --> | 359 <param name="method" value="tl.filter_rank_genes_groups"/> |
| 307 <!-- <param name="key_added" value="rank_genes_groups_filtered"/> --> | 360 <param name="key" value="rank_genes_groups"/> |
| 308 <!-- <param name="min_in_group_fraction" value="0.25"/> --> | 361 <param name="use_raw" value="False"/> |
| 309 <!-- <param name="max_out_group_fraction" value="0.5"/> --> | 362 <param name="key_added" value="rank_genes_groups_filtered"/> |
| 310 <!-- <param name="min_fold_change" value="3"/> --> | 363 <param name="min_in_group_fraction" value="0.25"/> |
| 311 <!-- </conditional> --> | 364 <param name="max_out_group_fraction" value="0.5"/> |
| 312 <!-- <output name="hidden_output"> --> | 365 <param name="min_fold_change" value="3"/> |
| 313 <!-- <assert_contents> --> | 366 </conditional> |
| 314 <!-- <has_text_matching expression="tl.filter_rank_genes_groups"/> --> | 367 <section name="advanced_common"> |
| 315 <!-- <has_text_matching expression="key='rank_genes_groups'"/> --> | 368 <param name="show_log" value="true" /> |
| 316 <!-- <has_text_matching expression="use_raw=False"/> --> | 369 </section> |
| 317 <!-- <has_text_matching expression="log=False"/> --> | 370 <output name="hidden_output"> |
| 318 <!-- <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> --> | 371 <assert_contents> |
| 319 <!-- <has_text_matching expression="min_in_group_fraction=0.25"/> --> | 372 <has_text_matching expression="tl.filter_rank_genes_groups"/> |
| 320 <!-- <has_text_matching expression="max_out_group_fraction=0.5"/> --> | 373 <has_text_matching expression="key='rank_genes_groups'"/> |
| 321 <!-- <has_text_matching expression="min_fold_change=3"/> --> | 374 <has_text_matching expression="use_raw=False"/> |
| 322 <!-- </assert_contents> --> | 375 <has_text_matching expression="log=False"/> |
| 323 <!-- </output> --> | 376 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> |
| 324 <!-- <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> --> | 377 <has_text_matching expression="min_in_group_fraction=0.25"/> |
| 325 <!-- </test> --> | 378 <has_text_matching expression="max_out_group_fraction=0.5"/> |
| 326 <test> | 379 <has_text_matching expression="min_fold_change=3"/> |
| 327 <!-- test 4 --> | 380 </assert_contents> |
| 381 </output> | |
| 382 <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 383 </test>--> | |
| 384 <test expect_num_outputs="2"> | |
| 385 <!-- test 5 --> | |
| 328 <param name="adata" value="blobs.h5ad"/> | 386 <param name="adata" value="blobs.h5ad"/> |
| 329 <conditional name="method"> | 387 <conditional name="method"> |
| 330 <param name="method" value="pp.highly_variable_genes"/> | 388 <param name="method" value="pp.highly_variable_genes"/> |
| 331 <conditional name="flavor"> | 389 <conditional name="flavor"> |
| 332 <param name="flavor" value="seurat"/> | 390 <param name="flavor" value="seurat"/> |
| 351 <has_text_matching expression="subset=False"/> | 409 <has_text_matching expression="subset=False"/> |
| 352 </assert_contents> | 410 </assert_contents> |
| 353 </output> | 411 </output> |
| 354 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.2"/> | 412 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.2"/> |
| 355 </test> | 413 </test> |
| 356 <test> | 414 <test expect_num_outputs="2"> |
| 357 <!-- test 5 --> | 415 <!-- test 6 --> |
| 358 <param name="adata" value="krumsiek11.h5ad" /> | 416 <param name="adata" value="krumsiek11.h5ad" /> |
| 359 <conditional name="method"> | 417 <conditional name="method"> |
| 360 <param name="method" value="pp.highly_variable_genes"/> | 418 <param name="method" value="pp.highly_variable_genes"/> |
| 361 <conditional name="flavor"> | 419 <conditional name="flavor"> |
| 362 <param name="flavor" value="cell_ranger"/> | 420 <param name="flavor" value="cell_ranger"/> |
| 377 <has_text_matching expression="subset=True"/> | 435 <has_text_matching expression="subset=True"/> |
| 378 </assert_contents> | 436 </assert_contents> |
| 379 </output> | 437 </output> |
| 380 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.9"/> | 438 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.9"/> |
| 381 </test> | 439 </test> |
| 382 <test> | 440 <test expect_num_outputs="2"> |
| 383 <!-- test 6 --> | 441 <!-- test 7 --> |
| 384 <param name="adata" value="krumsiek11.h5ad" /> | 442 <param name="adata" value="krumsiek11.h5ad" /> |
| 385 <conditional name="method"> | 443 <conditional name="method"> |
| 386 <param name="method" value="pp.subsample"/> | 444 <param name="method" value="pp.subsample"/> |
| 387 <conditional name="type"> | 445 <conditional name="type"> |
| 388 <param name="type" value="fraction" /> | 446 <param name="type" value="fraction" /> |
| 400 <has_text_matching expression="random_state=0"/> | 458 <has_text_matching expression="random_state=0"/> |
| 401 </assert_contents> | 459 </assert_contents> |
| 402 </output> | 460 </output> |
| 403 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> | 461 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> |
| 404 </test> | 462 </test> |
| 405 <test> | 463 <test expect_num_outputs="2"> |
| 406 <!-- test 7 --> | 464 <!-- test 8 --> |
| 407 <param name="adata" value="krumsiek11.h5ad" /> | 465 <param name="adata" value="krumsiek11.h5ad" /> |
| 408 <conditional name="method"> | 466 <conditional name="method"> |
| 409 <param name="method" value="pp.subsample"/> | 467 <param name="method" value="pp.subsample"/> |
| 410 <conditional name="type"> | 468 <conditional name="type"> |
| 411 <param name="type" value="n_obs" /> | 469 <param name="type" value="n_obs" /> |
| 423 <has_text_matching expression="random_state=0"/> | 481 <has_text_matching expression="random_state=0"/> |
| 424 </assert_contents> | 482 </assert_contents> |
| 425 </output> | 483 </output> |
| 426 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> | 484 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> |
| 427 </test> | 485 </test> |
| 428 <test> | 486 <test expect_num_outputs="2"> |
| 429 <!-- test 8 --> | 487 <!-- test 9 --> |
| 430 <param name="adata" value="random-randint.h5ad" /> | 488 <param name="adata" value="random-randint.h5ad" /> |
| 431 <conditional name="method"> | 489 <conditional name="method"> |
| 432 <param name="method" value="pp.downsample_counts"/> | 490 <param name="method" value="pp.downsample_counts"/> |
| 433 <param name="total_counts" value="20000"/> | 491 <param name="total_counts" value="20000"/> |
| 434 <param name="random_state" value="0"/> | 492 <param name="random_state" value="0"/> |
| 445 <has_text_matching expression="replace=False"/> | 503 <has_text_matching expression="replace=False"/> |
| 446 </assert_contents> | 504 </assert_contents> |
| 447 </output> | 505 </output> |
| 448 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/> | 506 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/> |
| 449 </test> | 507 </test> |
| 508 <test expect_num_outputs="3"> | |
| 509 <!-- test 10 --> | |
| 510 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" /> | |
| 511 <conditional name="method"> | |
| 512 <param name="method" value="filter_marker"/> | |
| 513 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/> | |
| 514 <param name="thresh_mean" value="1.0"/> | |
| 515 <param name="thresh_frac" value="0.2"/> | |
| 516 <param name="layer_selection" value="True"/> | |
| 517 <param name="groupby" value="bulk_labels"/> | |
| 518 </conditional> | |
| 519 <section name="advanced_common"> | |
| 520 <param name="show_log" value="true" /> | |
| 521 </section> | |
| 522 <output name="hidden_output"> | |
| 523 <assert_contents> | |
| 524 <has_text_matching expression="adata, key, x, 1.0, 0.2, 'bulk_labels'"/> | |
| 525 </assert_contents> | |
| 526 </output> | |
| 527 <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" ftype="h5ad"> | |
| 528 <assert_contents> | |
| 529 <has_h5_keys keys="obs, var, uns" /> | |
| 530 </assert_contents> | |
| 531 </output> | |
| 532 <output name="marker_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv" ftype="tabular" compare="sim_size"/> | |
| 533 </test> | |
| 450 </tests> | 534 </tests> |
| 451 <help><![CDATA[ | 535 <help><![CDATA[ |
| 452 | 536 |
| 453 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) | 537 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) |
| 454 ======================================================================================== | 538 ======================================================================================== |
| 459 | 543 |
| 460 Only provide one of the optional parameters `min_counts`, `min_genes`, | 544 Only provide one of the optional parameters `min_counts`, `min_genes`, |
| 461 `max_counts`, `max_genes` per call. | 545 `max_counts`, `max_genes` per call. |
| 462 | 546 |
| 463 More details on the `scanpy documentation | 547 More details on the `scanpy documentation |
| 464 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.filter_cells.html>`__ | 548 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.filter_cells.html>`__ |
| 465 | 549 |
| 466 | 550 |
| 467 Filter genes based on number of cells or counts (`pp.filter_genes`) | 551 Filter genes based on number of cells or counts (`pp.filter_genes`) |
| 468 =================================================================== | 552 =================================================================== |
| 469 | 553 |
| 473 | 557 |
| 474 Only provide one of the optional parameters `min_counts`, `min_cells`, | 558 Only provide one of the optional parameters `min_counts`, `min_cells`, |
| 475 `max_counts`, `max_cells` per call. | 559 `max_counts`, `max_cells` per call. |
| 476 | 560 |
| 477 More details on the `scanpy documentation | 561 More details on the `scanpy documentation |
| 478 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.filter_genes.html>`__ | 562 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.filter_genes.html>`__ |
| 479 | 563 |
| 480 | 564 |
| 481 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) | 565 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) |
| 482 ========================================================================================================================================================== | 566 ========================================================================================================================================================== |
| 483 | 567 |
| 484 More details on the `scanpy documentation | 568 More details on the `scanpy documentation |
| 485 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.filter_rank_genes_groups.html>`__ | 569 <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__ |
| 486 | 570 |
| 487 | 571 |
| 488 Annotate highly variable genes (`pp.highly_variable_genes`) | 572 Annotate highly variable genes (`pp.highly_variable_genes`) |
| 489 =========================================================== | 573 =========================================================== |
| 490 | 574 |
| 495 | 579 |
| 496 Subsample to a fraction of the number of observations (`pp.subsample`) | 580 Subsample to a fraction of the number of observations (`pp.subsample`) |
| 497 ====================================================================== | 581 ====================================================================== |
| 498 | 582 |
| 499 More details on the `scanpy documentation | 583 More details on the `scanpy documentation |
| 500 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.subsample.html>`__ | 584 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.subsample.html>`__ |
| 501 | 585 |
| 502 Downsample counts (`pp.downsample_counts`) | 586 Downsample counts (`pp.downsample_counts`) |
| 503 ========================================== | 587 ========================================== |
| 504 | 588 |
| 505 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This | 589 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This |
| 506 has been implemented by M. D. Luecken. | 590 has been implemented by M. D. Luecken. |
| 507 | 591 |
| 592 | |
| 593 Filter marker genes (`filter_marker`) | |
| 594 ====================================================================== | |
| 595 | |
| 596 This option is specific for celltype marker gene detection. You can generate a celltype marker gene file (tsv) with **COSG** provided at Galaxy. | |
| 597 | |
| 598 The marker gene file should have as rows celltypes and columns as marker genes. Each celltype can have varying number of marker genes. | |
| 599 | |
| 600 A marker gene is returned (retained in the list) if the mean expression of the marker gene is bigger than the threshold of mean expression (thresh_mean) and if the fraction of cells with the marker gene expression is equal or higher than the cell fraction threshold (thresh_frac). | |
| 601 | |
| 508 More details on the `scanpy documentation | 602 More details on the `scanpy documentation |
| 509 <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.downsample_counts.html>`__ | 603 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.downsample_counts.html>`__ |
| 510 | 604 |
| 511 | 605 |
| 512 ]]></help> | 606 ]]></help> |
| 513 <expand macro="citations"/> | 607 <expand macro="citations"/> |
| 514 </tool> | 608 </tool> |
