Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 1:deeb0203d693 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
| author | iuc |
|---|---|
| date | Wed, 16 Oct 2019 06:25:49 -0400 |
| parents | 7edb8980267d |
| children | 21de91f71706 |
comparison
equal
deleted
inserted
replaced
| 0:7edb8980267d | 1:deeb0203d693 |
|---|---|
| 1 <tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@"> | 1 <tool id="scanpy_filter" name="Filter" version="@galaxy_version@"> |
| 2 <description></description> | 2 <description>with scanpy</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="requirements"/> | 6 <expand macro="requirements"/> |
| 7 <expand macro="version_command"/> | 7 <expand macro="version_command"/> |
| 12 <configfile name="script_file"><![CDATA[ | 12 <configfile name="script_file"><![CDATA[ |
| 13 @CMD_imports@ | 13 @CMD_imports@ |
| 14 @CMD_read_inputs@ | 14 @CMD_read_inputs@ |
| 15 | 15 |
| 16 #if $method.method == 'pp.filter_cells' | 16 #if $method.method == 'pp.filter_cells' |
| 17 res = sc.pp.filter_cells( | 17 sc.pp.filter_cells( |
| 18 #if $modify_anndata.modify_anndata == 'true' | |
| 19 adata, | 18 adata, |
| 20 #else | |
| 21 adata.X, | |
| 22 #end if | |
| 23 #if $method.filter.filter == 'min_counts' | 19 #if $method.filter.filter == 'min_counts' |
| 24 min_counts=$method.filter.min_counts, | 20 min_counts=$method.filter.min_counts, |
| 25 #elif $method.filter.filter == 'max_counts' | 21 #else if $method.filter.filter == 'max_counts' |
| 26 max_counts=$method.filter.max_counts, | 22 max_counts=$method.filter.max_counts, |
| 27 #elif $method.filter.filter == 'min_genes' | 23 #else if $method.filter.filter == 'min_genes' |
| 28 min_genes=$method.filter.min_genes, | 24 min_genes=$method.filter.min_genes, |
| 29 #elif $method.filter.filter == 'max_genes' | 25 #else if $method.filter.filter == 'max_genes' |
| 30 max_genes=$method.filter.max_genes, | 26 max_genes=$method.filter.max_genes, |
| 31 #end if | 27 #end if |
| 32 copy=False) | 28 copy=False) |
| 33 | 29 |
| 34 #if $modify_anndata.modify_anndata == 'true' | 30 #else if $method.method == 'pp.filter_genes' |
| 35 df = adata.obs | 31 sc.pp.filter_genes( |
| 36 #else | |
| 37 df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1])) | |
| 38 #end if | |
| 39 | |
| 40 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
| 41 df.to_csv('$counts_per_cell', sep='\t') | |
| 42 #elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes' | |
| 43 df.to_csv('$genes_per_cell', sep='\t') | |
| 44 #end if | |
| 45 | |
| 46 #elif $method.method == 'pp.filter_genes' | |
| 47 res = sc.pp.filter_genes( | |
| 48 #if $modify_anndata.modify_anndata == 'true' | |
| 49 adata, | 32 adata, |
| 50 #else | |
| 51 adata.X, | |
| 52 #end if | |
| 53 #if $method.filter.filter == 'min_counts' | 33 #if $method.filter.filter == 'min_counts' |
| 54 min_counts=$method.filter.min_counts, | 34 min_counts=$method.filter.min_counts, |
| 55 #elif $method.filter.filter == 'max_counts' | 35 #else if $method.filter.filter == 'max_counts' |
| 56 max_counts=$method.filter.max_counts, | 36 max_counts=$method.filter.max_counts, |
| 57 #elif $method.filter.filter == 'min_cells' | 37 #else if $method.filter.filter == 'min_cells' |
| 58 min_cells=$method.filter.min_cells, | 38 min_cells=$method.filter.min_cells, |
| 59 #elif $method.filter.filter == 'max_cells' | 39 #else if $method.filter.filter == 'max_cells' |
| 60 max_cells=$method.filter.max_cells, | 40 max_cells=$method.filter.max_cells, |
| 61 #end if | 41 #end if |
| 62 copy=False) | 42 copy=False) |
| 63 | 43 |
| 64 #if $modify_anndata.modify_anndata == 'true' | 44 #else if $method.method == 'tl.filter_rank_genes_groups' |
| 65 df = adata.var | 45 sc.tl.filter_rank_genes_groups( |
| 66 #else | |
| 67 df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1])) | |
| 68 #end if | |
| 69 | |
| 70 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
| 71 df.to_csv('$counts_per_gene', sep='\t') | |
| 72 #elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells' | |
| 73 df.to_csv('$cells_per_gene', sep='\t') | |
| 74 #end if | |
| 75 | |
| 76 #elif $method.method == 'pp.filter_genes_dispersion' | |
| 77 res = sc.pp.filter_genes_dispersion( | |
| 78 #if $modify_anndata.modify_anndata == 'true' | |
| 79 adata, | 46 adata, |
| 80 #else | 47 #if str($method.key) != '' |
| 81 adata.X, | 48 key='$method.key', |
| 82 #end if | 49 #end if |
| 50 #if str($method.groupby) != '' | |
| 51 groupby='$method.groupby', | |
| 52 #end if | |
| 53 use_raw=$method.use_raw, | |
| 54 log=$method.log, | |
| 55 key_added='$method.key_added', | |
| 56 min_in_group_fraction=$method.min_in_group_fraction, | |
| 57 max_out_group_fraction=$method.max_out_group_fraction, | |
| 58 min_fold_change=$method.min_fold_change) | |
| 59 | |
| 60 #else if $method.method == "pp.highly_variable_genes" | |
| 61 sc.pp.highly_variable_genes( | |
| 62 adata=adata, | |
| 83 flavor='$method.flavor.flavor', | 63 flavor='$method.flavor.flavor', |
| 84 #if $method.flavor.flavor=='seurat' | 64 #if $method.flavor.flavor == 'seurat' |
| 65 #if str($method.flavor.min_mean) != '' | |
| 85 min_mean=$method.flavor.min_mean, | 66 min_mean=$method.flavor.min_mean, |
| 67 #end if | |
| 68 #if str($method.flavor.max_mean) != '' | |
| 86 max_mean=$method.flavor.max_mean, | 69 max_mean=$method.flavor.max_mean, |
| 70 #end if | |
| 71 #if str($method.flavor.min_disp) != '' | |
| 87 min_disp=$method.flavor.min_disp, | 72 min_disp=$method.flavor.min_disp, |
| 88 #if $method.flavor.max_disp | 73 #end if |
| 74 #if str($method.flavor.max_disp) != '' | |
| 89 max_disp=$method.flavor.max_disp, | 75 max_disp=$method.flavor.max_disp, |
| 90 #end if | 76 #end if |
| 91 #else | 77 #else if $method.flavor.flavor == 'cell_ranger' |
| 92 n_top_genes=$method.flavor.n_top_genes, | 78 n_top_genes=$method.flavor.n_top_genes, |
| 93 #end if | 79 #end if |
| 94 n_bins=$method.n_bins, | 80 n_bins=$method.n_bins, |
| 95 log=$method.log, | 81 subset=$method.subset, |
| 96 copy=False) | 82 inplace=True) |
| 97 | 83 |
| 98 #if $modify_anndata.modify_anndata == 'true' | 84 #else if $method.method == 'pp.subsample' |
| 99 adata.var.to_csv('$per_gene', sep='\t') | |
| 100 #else | |
| 101 pd.DataFrame(res).to_csv('$per_gene', sep='\t') | |
| 102 #end if | |
| 103 | |
| 104 #elif $method.method == 'pp.subsample' | |
| 105 sc.pp.subsample( | 85 sc.pp.subsample( |
| 106 data=adata, | 86 data=adata, |
| 107 #if $method.type.type == 'fraction' | 87 #if $method.type.type == 'fraction' |
| 108 fraction=$method.type.fraction, | 88 fraction=$method.type.fraction, |
| 109 #else if $method.type.type == 'n_obs' | 89 #else if $method.type.type == 'n_obs' |
| 110 n_obs=$method.type.n_obs, | 90 n_obs=$method.type.n_obs, |
| 111 #end if | 91 #end if |
| 112 random_state=$method.random_state, | 92 random_state=$method.random_state, |
| 113 copy=False) | 93 copy=False) |
| 114 | 94 |
| 95 #else if $method.method == "pp.downsample_counts" | |
| 96 sc.pp.downsample_counts( | |
| 97 adata=adata, | |
| 98 #if str($method.counts_per_cell) != '' | |
| 99 counts_per_cell=$method.counts_per_cell, | |
| 100 #end if | |
| 101 #if str($method.total_counts) != '' | |
| 102 total_counts=$method.total_counts, | |
| 103 #end if | |
| 104 random_state=$method.random_state, | |
| 105 replace=$method.replace, | |
| 106 copy=False) | |
| 115 #end if | 107 #end if |
| 116 | 108 |
| 117 @CMD_anndata_write_modify_outputs@ | 109 @CMD_anndata_write_outputs@ |
| 118 ]]></configfile> | 110 ]]></configfile> |
| 119 </configfiles> | 111 </configfiles> |
| 120 <inputs> | 112 <inputs> |
| 121 <expand macro="inputs_anndata"/> | 113 <expand macro="inputs_anndata"/> |
| 122 <conditional name="method"> | 114 <conditional name="method"> |
| 123 <param argument="method" type="select" label="Method used for filtering"> | 115 <param argument="method" type="select" label="Method used for filtering"> |
| 124 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> | 116 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> |
| 125 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> | 117 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> |
| 126 <option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option> | 118 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using `tl.filter_rank_genes_groups`</option> |
| 127 <!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!--> | 119 <option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option> |
| 128 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> | 120 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> |
| 129 <!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!--> | 121 <option value="pp.downsample_counts">Downsample counts from count matrix, using `pp.downsample_counts`</option> |
| 130 <!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!--> | |
| 131 </param> | 122 </param> |
| 132 <when value="pp.filter_cells"> | 123 <when value="pp.filter_cells"> |
| 133 <conditional name="filter"> | 124 <conditional name="filter"> |
| 134 <param argument="filter" type="select" label="Filter"> | 125 <param argument="filter" type="select" label="Filter"> |
| 135 <option value="min_counts">Minimum number of counts</option> | 126 <option value="min_counts">Minimum number of counts</option> |
| 158 <option value="max_counts">Maximum number of counts</option> | 149 <option value="max_counts">Maximum number of counts</option> |
| 159 <option value="min_cells">Minimum number of cells expressed</option> | 150 <option value="min_cells">Minimum number of cells expressed</option> |
| 160 <option value="max_cells">Maximum number of cells expressed</option> | 151 <option value="max_cells">Maximum number of cells expressed</option> |
| 161 </param> | 152 </param> |
| 162 <when value="min_counts"> | 153 <when value="min_counts"> |
| 163 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/> | 154 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> |
| 164 </when> | 155 </when> |
| 165 <when value="max_counts"> | 156 <when value="max_counts"> |
| 166 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/> | 157 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> |
| 167 </when> | 158 </when> |
| 168 <when value="min_cells"> | 159 <when value="min_cells"> |
| 169 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/> | 160 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> |
| 170 </when> | 161 </when> |
| 171 <when value="max_cells"> | 162 <when value="max_cells"> |
| 172 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/> | 163 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> |
| 173 </when> | 164 </when> |
| 174 </conditional> | 165 </conditional> |
| 175 </when> | 166 </when> |
| 176 <when value="pp.filter_genes_dispersion"> | 167 <when value="tl.filter_rank_genes_groups"> |
| 168 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"/> | |
| 169 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"/> | |
| 170 <expand macro="param_use_raw"/> | |
| 171 <expand macro="param_log"/> | |
| 172 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"/> | |
| 173 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> | |
| 174 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> | |
| 175 <param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/> | |
| 176 </when> | |
| 177 <when value="pp.highly_variable_genes"> | |
| 177 <conditional name='flavor'> | 178 <conditional name='flavor'> |
| 178 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help=""> | 179 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion"> |
| 179 <option value="seurat">seurat: expects non-logarithmized data</option> | 180 <option value="seurat">seurat: expects non-logarithmized data</option> |
| 180 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> | 181 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> |
| 181 </param> | 182 </param> |
| 182 <when value="seurat"> | 183 <when value="seurat"> |
| 183 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/> | 184 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/> |
| 184 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/> | 185 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/> |
| 185 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/> | 186 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/> |
| 186 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/> | 187 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/> |
| 187 </when> | 188 </when> |
| 188 <when value="cell_ranger"> | 189 <when value="cell_ranger"> |
| 189 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/> | 190 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/> |
| 190 </when> | 191 </when> |
| 191 </conditional> | 192 </conditional> |
| 192 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> | 193 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> |
| 193 <expand macro="param_log"/> | 194 <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/> |
| 194 </when> | 195 </when> |
| 195 <when value="pp.subsample"> | 196 <when value="pp.subsample"> |
| 196 <conditional name="type"> | 197 <conditional name="type"> |
| 197 <param name="type" type="select" label="Type of subsampling"> | 198 <param name="type" type="select" label="Type of subsampling"> |
| 198 <option value="fraction">By fraction</option> | 199 <option value="fraction">By fraction</option> |
| 199 <option value="n_obs">By number of observation</option> | 200 <option value="n_obs">By number of observation</option> |
| 200 </param> | 201 </param> |
| 201 <when value="fraction"> | 202 <when value="fraction"> |
| 202 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/> | 203 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations"/> |
| 203 </when> | 204 </when> |
| 204 <when value="n_obs"> | 205 <when value="n_obs"> |
| 205 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/> | 206 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/> |
| 206 </when> | 207 </when> |
| 207 </conditional> | 208 </conditional> |
| 208 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> | 209 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> |
| 210 </when> | |
| 211 <when value="pp.downsample_counts"> | |
| 212 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> | |
| 213 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> | |
| 214 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> | |
| 215 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> | |
| 209 </when> | 216 </when> |
| 210 </conditional> | 217 </conditional> |
| 211 <expand macro="anndata_modify_output_input"/> | |
| 212 </inputs> | 218 </inputs> |
| 213 <outputs> | 219 <outputs> |
| 214 <expand macro="anndata_modify_outputs"/> | 220 <expand macro="anndata_outputs"/> |
| 215 <!-- for pp.filter_cells --> | |
| 216 <data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering"> | |
| 217 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
| 218 </data> | |
| 219 <data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering"> | |
| 220 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter> | |
| 221 </data> | |
| 222 <!-- for pp.filter_genes --> | |
| 223 <data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering"> | |
| 224 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
| 225 </data> | |
| 226 <data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering"> | |
| 227 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter> | |
| 228 </data> | |
| 229 <!-- for pp.filter_genes_dispersion --> | |
| 230 <data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene"> | |
| 231 <filter>method['method'] == 'pp.filter_genes_dispersion'</filter> | |
| 232 </data> | |
| 233 </outputs> | 221 </outputs> |
| 234 <tests> | 222 <tests> |
| 235 <test expect_num_outputs="2"> | 223 <test> |
| 236 <conditional name="input"> | 224 <!-- test 1 --> |
| 237 <param name="format" value="h5ad" /> | 225 <param name="adata" value="krumsiek11.h5ad" /> |
| 238 <param name="adata" value="krumsiek11.h5ad" /> | |
| 239 </conditional> | |
| 240 <conditional name="method"> | 226 <conditional name="method"> |
| 241 <param name="method" value="pp.filter_cells"/> | 227 <param name="method" value="pp.filter_cells"/> |
| 242 <conditional name="filter"> | 228 <conditional name="filter"> |
| 243 <param name="filter" value="min_counts"/> | 229 <param name="filter" value="min_counts"/> |
| 244 <param name="min_counts" value="3"/> | 230 <param name="min_counts" value="3"/> |
| 245 </conditional> | 231 </conditional> |
| 246 </conditional> | 232 </conditional> |
| 247 <conditional name="modify_anndata"> | |
| 248 <param name="modify_anndata" value="true"/> | |
| 249 <param name="anndata_output_format" value="h5ad" /> | |
| 250 </conditional> | |
| 251 <assert_stdout> | 233 <assert_stdout> |
| 252 <has_text_matching expression="sc.pp.filter_cells"/> | 234 <has_text_matching expression="sc.pp.filter_cells"/> |
| 253 <has_text_matching expression="min_counts=3"/> | 235 <has_text_matching expression="min_counts=3"/> |
| 254 </assert_stdout> | 236 </assert_stdout> |
| 255 <output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | 237 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> |
| 256 <output name="counts_per_cell"> | 238 </test> |
| 257 <assert_contents> | 239 <test> |
| 258 <has_text_matching expression="cell_type\tn_counts" /> | 240 <!-- test 2 --> |
| 259 <has_text_matching expression="46\tprogenitor\t3.028" /> | 241 <param name="adata" value="krumsiek11.h5ad" /> |
| 260 <has_text_matching expression="85\tEry\t3.7001" /> | |
| 261 <has_text_matching expression="150\tMk\t4.095" /> | |
| 262 <has_n_columns n="3" /> | |
| 263 </assert_contents> | |
| 264 </output> | |
| 265 </test> | |
| 266 <test expect_num_outputs="2"> | |
| 267 <conditional name="input"> | |
| 268 <param name="format" value="loom" /> | |
| 269 <param name="adata" value="krumsiek11.loom" /> | |
| 270 <param name="sparse" value="True"/> | |
| 271 <param name="cleanup" value="False"/> | |
| 272 <param name="x_name" value="spliced"/> | |
| 273 <param name="obs_names" value="CellID" /> | |
| 274 <param name="var_names" value="Gene"/> | |
| 275 </conditional> | |
| 276 <conditional name="method"> | |
| 277 <param name="method" value="pp.filter_cells"/> | |
| 278 <conditional name="filter"> | |
| 279 <param name="filter" value="min_counts"/> | |
| 280 <param name="min_counts" value="3"/> | |
| 281 </conditional> | |
| 282 </conditional> | |
| 283 <conditional name="modify_anndata"> | |
| 284 <param name="modify_anndata" value="true"/> | |
| 285 <param name="anndata_output_format" value="loom" /> | |
| 286 </conditional> | |
| 287 <assert_stdout> | |
| 288 <has_text_matching expression="sc.pp.filter_cells"/> | |
| 289 <has_text_matching expression="min_counts=3"/> | |
| 290 </assert_stdout> | |
| 291 <output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/> | |
| 292 <output name="counts_per_cell"> | |
| 293 <assert_contents> | |
| 294 <has_text_matching expression="cell_type\tn_counts" /> | |
| 295 <has_text_matching expression="46\tprogenitor\t3.028" /> | |
| 296 <has_text_matching expression="85\tEry\t3.7001" /> | |
| 297 <has_text_matching expression="97\tMo\t3.925" /> | |
| 298 <has_text_matching expression="150\tMk\t4.095" /> | |
| 299 <has_n_columns n="3" /> | |
| 300 </assert_contents> | |
| 301 </output> | |
| 302 </test> | |
| 303 <test expect_num_outputs="1"> | |
| 304 <conditional name="input"> | |
| 305 <param name="format" value="h5ad" /> | |
| 306 <param name="adata" value="krumsiek11.h5ad"/> | |
| 307 </conditional> | |
| 308 <conditional name="method"> | 242 <conditional name="method"> |
| 309 <param name="method" value="pp.filter_cells"/> | 243 <param name="method" value="pp.filter_cells"/> |
| 310 <conditional name="filter"> | 244 <conditional name="filter"> |
| 311 <param name="filter" value="max_genes"/> | 245 <param name="filter" value="max_genes"/> |
| 312 <param name="max_genes" value="100"/> | 246 <param name="max_genes" value="100"/> |
| 313 </conditional> | 247 </conditional> |
| 314 </conditional> | 248 </conditional> |
| 315 <conditional name="modify_anndata"> | |
| 316 <param name="modify_anndata" value="false"/> | |
| 317 </conditional> | |
| 318 <assert_stdout> | 249 <assert_stdout> |
| 319 <has_text_matching expression="sc.pp.filter_cells"/> | 250 <has_text_matching expression="sc.pp.filter_cells"/> |
| 320 <has_text_matching expression="adata.X"/> | 251 <has_text_matching expression="adata"/> |
| 321 <has_text_matching expression="max_genes=100"/> | 252 <has_text_matching expression="max_genes=100"/> |
| 322 </assert_stdout> | 253 </assert_stdout> |
| 323 <output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/> | 254 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> |
| 324 </test> | 255 </test> |
| 325 <test expect_num_outputs="2"> | 256 <test> |
| 326 <conditional name="input"> | 257 <!-- test 3 --> |
| 327 <param name="format" value="h5ad" /> | 258 <param name="adata" value="krumsiek11.h5ad" /> |
| 328 <param name="adata" value="krumsiek11.h5ad" /> | |
| 329 </conditional> | |
| 330 <conditional name="method"> | 259 <conditional name="method"> |
| 331 <param name="method" value="pp.filter_genes"/> | 260 <param name="method" value="pp.filter_genes"/> |
| 332 <conditional name="filter"> | 261 <conditional name="filter"> |
| 333 <param name="filter" value="min_counts"/> | 262 <param name="filter" value="min_counts"/> |
| 334 <param name="min_counts" value="3"/> | 263 <param name="min_counts" value="3"/> |
| 335 </conditional> | 264 </conditional> |
| 336 </conditional> | 265 </conditional> |
| 337 <conditional name="modify_anndata"> | |
| 338 <param name="modify_anndata" value="true"/> | |
| 339 <param name="anndata_output_format" value="h5ad" /> | |
| 340 </conditional> | |
| 341 <assert_stdout> | 266 <assert_stdout> |
| 342 <has_text_matching expression="sc.pp.filter_genes"/> | 267 <has_text_matching expression="sc.pp.filter_genes"/> |
| 343 <has_text_matching expression="min_counts=3"/> | 268 <has_text_matching expression="min_counts=3"/> |
| 344 </assert_stdout> | 269 </assert_stdout> |
| 345 <output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | 270 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> |
| 346 <output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/> | 271 </test> |
| 347 </test> | 272 <test> |
| 348 <test expect_num_outputs="1"> | 273 <!-- test 4 --> |
| 349 <conditional name="input"> | 274 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> |
| 350 <param name="format" value="h5ad" /> | 275 <conditional name="method"> |
| 351 <param name="adata" value="pbmc68k_reduced.h5ad"/> | 276 <param name="method" value="tl.filter_rank_genes_groups"/> |
| 352 </conditional> | 277 <param name="key" value="rank_genes_groups"/> |
| 353 <conditional name="method"> | 278 <param name="use_raw" value="False"/> |
| 354 <param name="method" value="pp.filter_genes"/> | 279 <param name="log" value="False"/> |
| 355 <conditional name="filter"> | 280 <param name="key_added" value="rank_genes_groups_filtered"/> |
| 356 <param name="filter" value="max_cells"/> | 281 <param name="min_in_group_fraction" value="0.25"/> |
| 357 <param name="max_cells" value="500"/> | 282 <param name="max_out_group_fraction" value="0.5"/> |
| 358 </conditional> | 283 <param name="min_fold_change" value="3"/> |
| 359 </conditional> | 284 </conditional> |
| 360 <conditional name="modify_anndata"> | 285 <assert_stdout> |
| 361 <param name="modify_anndata" value="false"/> | 286 <has_text_matching expression="tl.filter_rank_genes_groups"/> |
| 362 </conditional> | 287 <has_text_matching expression="key='rank_genes_groups'"/> |
| 363 <assert_stdout> | 288 <has_text_matching expression="use_raw=False"/> |
| 364 <has_text_matching expression="sc.pp.filter_genes"/> | 289 <has_text_matching expression="log=False"/> |
| 365 <has_text_matching expression="adata.X"/> | 290 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> |
| 366 <has_text_matching expression="max_cells=500"/> | 291 <has_text_matching expression="min_in_group_fraction=0.25"/> |
| 367 </assert_stdout> | 292 <has_text_matching expression="max_out_group_fraction=0.5"/> |
| 368 <output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/> | 293 <has_text_matching expression="min_fold_change=3"/> |
| 369 </test> | 294 </assert_stdout> |
| 370 <test expect_num_outputs="2"> | 295 <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> |
| 371 <conditional name="input"> | 296 </test> |
| 372 <param name="format" value="h5ad" /> | 297 <test> |
| 373 <param name="adata" value="krumsiek11.h5ad" /> | 298 <!-- test 5 --> |
| 374 </conditional> | 299 <param name="adata" value="blobs.h5ad"/> |
| 375 <conditional name="method"> | 300 <conditional name="method"> |
| 376 <param name="method" value="pp.filter_genes_dispersion"/> | 301 <param name="method" value="pp.highly_variable_genes"/> |
| 377 <conditional name="flavor"> | 302 <conditional name="flavor"> |
| 378 <param name="flavor" value="seurat"/> | 303 <param name="flavor" value="seurat"/> |
| 379 <param name="min_mean" value="0.0125"/> | 304 <param name="min_mean" value="0.0125"/> |
| 380 <param name="max_mean" value="3"/> | 305 <param name="max_mean" value="3"/> |
| 381 <param name="min_disp" value="0.5"/> | 306 <param name="min_disp" value="0.5"/> |
| 382 </conditional> | 307 </conditional> |
| 383 <param name="n_bins" value="20" /> | 308 <param name="n_bins" value="20"/> |
| 384 <param name="log" value="true"/> | 309 <param name="subset" value="false"/> |
| 385 </conditional> | 310 </conditional> |
| 386 <conditional name="modify_anndata"> | 311 <assert_stdout> |
| 387 <param name="modify_anndata" value="true"/> | 312 <has_text_matching expression="sc.pp.highly_variable_genes"/> |
| 388 <param name="anndata_output_format" value="h5ad" /> | |
| 389 </conditional> | |
| 390 <assert_stdout> | |
| 391 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
| 392 <has_text_matching expression="flavor='seurat'"/> | 313 <has_text_matching expression="flavor='seurat'"/> |
| 393 <has_text_matching expression="min_mean=0.0125"/> | 314 <has_text_matching expression="min_mean=0.0125"/> |
| 394 <has_text_matching expression="max_mean=3.0"/> | 315 <has_text_matching expression="max_mean=3"/> |
| 395 <has_text_matching expression="min_disp=0.5"/> | 316 <has_text_matching expression="min_disp=0.5"/> |
| 396 <has_text_matching expression="n_bins=20"/> | 317 <has_text_matching expression="n_bins=20"/> |
| 397 <has_text_matching expression="log=True"/> | 318 <has_text_matching expression="subset=False"/> |
| 398 </assert_stdout> | 319 </assert_stdout> |
| 399 <output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/> | 320 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size"/> |
| 400 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/> | 321 </test> |
| 401 </test> | 322 <test> |
| 402 <test expect_num_outputs="1"> | 323 <!-- test 6 --> |
| 403 <conditional name="input"> | 324 <param name="adata" value="krumsiek11.h5ad" /> |
| 404 <param name="format" value="h5ad" /> | 325 <conditional name="method"> |
| 405 <param name="adata" value="krumsiek11.h5ad" /> | 326 <param name="method" value="pp.highly_variable_genes"/> |
| 406 </conditional> | |
| 407 <conditional name="method"> | |
| 408 <param name="method" value="pp.filter_genes_dispersion"/> | |
| 409 <conditional name="flavor"> | 327 <conditional name="flavor"> |
| 410 <param name="flavor" value="cell_ranger"/> | 328 <param name="flavor" value="cell_ranger"/> |
| 411 <param name="n_top_genes" value="2"/> | 329 <param name="n_top_genes" value="2"/> |
| 412 </conditional> | 330 </conditional> |
| 413 <param name="n_bins" value="20"/> | 331 <param name="n_bins" value="20"/> |
| 414 <param name="log" value="true"/> | 332 </conditional> |
| 415 </conditional> | 333 <assert_stdout> |
| 416 <conditional name="modify_anndata"> | 334 <has_text_matching expression="sc.pp.highly_variable_genes"/> |
| 417 <param name="modify_anndata" value="false"/> | |
| 418 </conditional> | |
| 419 <assert_stdout> | |
| 420 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
| 421 <has_text_matching expression="flavor='cell_ranger'"/> | 335 <has_text_matching expression="flavor='cell_ranger'"/> |
| 422 <has_text_matching expression="n_top_genes=2"/> | 336 <has_text_matching expression="n_top_genes=2"/> |
| 423 <has_text_matching expression="n_bins=20"/> | 337 <has_text_matching expression="n_bins=20"/> |
| 424 <has_text_matching expression="og=True"/> | 338 </assert_stdout> |
| 425 </assert_stdout> | 339 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size"/> |
| 426 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/> | 340 </test> |
| 427 </test> | 341 <test> |
| 428 <test expect_num_outputs="1"> | 342 <!-- test 7 --> |
| 429 <conditional name="input"> | 343 <param name="adata" value="krumsiek11.h5ad" /> |
| 430 <param name="format" value="h5ad" /> | |
| 431 <param name="adata" value="krumsiek11.h5ad" /> | |
| 432 </conditional> | |
| 433 <conditional name="method"> | 344 <conditional name="method"> |
| 434 <param name="method" value="pp.subsample"/> | 345 <param name="method" value="pp.subsample"/> |
| 435 <conditional name="type"> | 346 <conditional name="type"> |
| 436 <param name="type" value="fraction" /> | 347 <param name="type" value="fraction" /> |
| 437 <param name="fraction" value="0.5"/> | 348 <param name="fraction" value="0.5"/> |
| 438 </conditional> | 349 </conditional> |
| 439 <param name="random_state" value="0"/> | 350 <param name="random_state" value="0"/> |
| 440 </conditional> | 351 </conditional> |
| 441 <conditional name="modify_anndata"> | |
| 442 <param name="modify_anndata" value="true"/> | |
| 443 <param name="anndata_output_format" value="h5ad" /> | |
| 444 </conditional> | |
| 445 <assert_stdout> | 352 <assert_stdout> |
| 446 <has_text_matching expression="sc.pp.subsample"/> | 353 <has_text_matching expression="sc.pp.subsample"/> |
| 447 <has_text_matching expression="fraction=0.5"/> | 354 <has_text_matching expression="fraction=0.5"/> |
| 448 <has_text_matching expression="random_state=0"/> | 355 <has_text_matching expression="random_state=0"/> |
| 449 </assert_stdout> | 356 </assert_stdout> |
| 450 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/> | 357 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> |
| 451 </test> | 358 </test> |
| 452 <test expect_num_outputs="1"> | 359 <test> |
| 453 <conditional name="input"> | 360 <!-- test 8 --> |
| 454 <param name="format" value="h5ad" /> | 361 <param name="adata" value="krumsiek11.h5ad" /> |
| 455 <param name="adata" value="krumsiek11.h5ad" /> | |
| 456 </conditional> | |
| 457 <conditional name="method"> | 362 <conditional name="method"> |
| 458 <param name="method" value="pp.subsample"/> | 363 <param name="method" value="pp.subsample"/> |
| 459 <conditional name="type"> | 364 <conditional name="type"> |
| 460 <param name="type" value="n_obs" /> | 365 <param name="type" value="n_obs" /> |
| 461 <param name="n_obs" value="10"/> | 366 <param name="n_obs" value="10"/> |
| 462 </conditional> | 367 </conditional> |
| 463 <param name="random_state" value="0"/> | 368 <param name="random_state" value="0"/> |
| 464 </conditional> | 369 </conditional> |
| 465 <conditional name="modify_anndata"> | |
| 466 <param name="modify_anndata" value="true"/> | |
| 467 <param name="anndata_output_format" value="h5ad" /> | |
| 468 </conditional> | |
| 469 <assert_stdout> | 370 <assert_stdout> |
| 470 <has_text_matching expression="sc.pp.subsample"/> | 371 <has_text_matching expression="sc.pp.subsample"/> |
| 471 <has_text_matching expression="n_obs=10"/> | 372 <has_text_matching expression="n_obs=10"/> |
| 472 <has_text_matching expression="random_state=0"/> | 373 <has_text_matching expression="random_state=0"/> |
| 473 </assert_stdout> | 374 </assert_stdout> |
| 474 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/> | 375 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> |
| 376 </test> | |
| 377 <test> | |
| 378 <!-- test 9 --> | |
| 379 <param name="adata" value="random-randint.h5ad" /> | |
| 380 <conditional name="method"> | |
| 381 <param name="method" value="pp.downsample_counts"/> | |
| 382 <param name="total_counts" value="20000"/> | |
| 383 <param name="random_state" value="0"/> | |
| 384 <param name="replace" value="false"/> | |
| 385 </conditional> | |
| 386 <assert_stdout> | |
| 387 <has_text_matching expression="sc.pp.downsample_counts"/> | |
| 388 <has_text_matching expression="total_counts=20000"/> | |
| 389 <has_text_matching expression="random_state=0"/> | |
| 390 <has_text_matching expression="replace=False"/> | |
| 391 </assert_stdout> | |
| 392 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 475 </test> | 393 </test> |
| 476 </tests> | 394 </tests> |
| 477 <help><![CDATA[ | 395 <help><![CDATA[ |
| 478 | 396 |
| 479 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) | 397 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) |
| 485 | 403 |
| 486 Only provide one of the optional parameters `min_counts`, `min_genes`, | 404 Only provide one of the optional parameters `min_counts`, `min_genes`, |
| 487 `max_counts`, `max_genes` per call. | 405 `max_counts`, `max_genes` per call. |
| 488 | 406 |
| 489 More details on the `scanpy documentation | 407 More details on the `scanpy documentation |
| 490 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__ | 408 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_cells.html>`__ |
| 491 | |
| 492 Return | |
| 493 ------ | |
| 494 | |
| 495 number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell) | |
| 496 | 409 |
| 497 | 410 |
| 498 Filter genes based on number of cells or counts (`pp.filter_genes`) | 411 Filter genes based on number of cells or counts (`pp.filter_genes`) |
| 499 =================================================================== | 412 =================================================================== |
| 500 | 413 |
| 504 | 417 |
| 505 Only provide one of the optional parameters `min_counts`, `min_cells`, | 418 Only provide one of the optional parameters `min_counts`, `min_cells`, |
| 506 `max_counts`, `max_cells` per call. | 419 `max_counts`, `max_cells` per call. |
| 507 | 420 |
| 508 More details on the `scanpy documentation | 421 More details on the `scanpy documentation |
| 509 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__ | 422 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_genes.html>`__ |
| 510 | 423 |
| 511 Return | 424 |
| 512 ------ | 425 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) |
| 513 | 426 ========================================================================================================================================================== |
| 514 number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell) | |
| 515 | |
| 516 | |
| 517 Extract highly variable genes (`pp.filter_genes_dispersion`) | |
| 518 ============================================================ | |
| 519 | |
| 520 If trying out parameters, pass the data matrix instead of AnnData. | |
| 521 | |
| 522 Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger. | |
| 523 | |
| 524 The normalized dispersion is obtained by scaling with the mean and standard | |
| 525 deviation of the dispersions for genes falling into a given bin for mean | |
| 526 expression of genes. This means that for each bin of mean expression, highly | |
| 527 variable genes are selected. | |
| 528 | |
| 529 Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`. | |
| 530 | 427 |
| 531 More details on the `scanpy documentation | 428 More details on the `scanpy documentation |
| 532 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__ | 429 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__ |
| 533 | 430 |
| 534 Returns | 431 |
| 535 ------- | 432 Annotate highly variable genes (`pp.highly_variable_genes`) |
| 536 - The annotated matrix filtered, with the annotations | 433 =========================================================== |
| 537 - A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`. | 434 |
| 435 It expects logarithmized data. | |
| 436 | |
| 437 Depending on flavor, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. | |
| 538 | 438 |
| 539 | 439 |
| 540 Subsample to a fraction of the number of observations (`pp.subsample`) | 440 Subsample to a fraction of the number of observations (`pp.subsample`) |
| 541 ====================================================================== | 441 ====================================================================== |
| 542 | 442 |
| 543 More details on the `scanpy documentation | 443 More details on the `scanpy documentation |
| 544 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__ | 444 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.subsample.html>`__ |
| 445 | |
| 446 Downsample counts (`pp.downsample_counts`) | |
| 447 ========================================== | |
| 448 | |
| 449 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This | |
| 450 has been implemented by M. D. Luecken. | |
| 451 | |
| 452 More details on the `scanpy documentation | |
| 453 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.downsample_counts.html>`__ | |
| 545 | 454 |
| 546 | 455 |
| 547 ]]></help> | 456 ]]></help> |
| 548 <expand macro="citations"/> | 457 <expand macro="citations"/> |
| 549 </tool> | 458 </tool> |
