Mercurial > repos > iuc > episcanpy_preprocess
comparison preprocess.xml @ 0:307f05e02a03 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
| author | iuc |
|---|---|
| date | Tue, 18 Apr 2023 13:17:48 +0000 |
| parents | |
| children | ed9b88a259f1 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:307f05e02a03 |
|---|---|
| 1 <tool id="episcanpy_preprocess" name="scATAC-seq Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>with EpiScanpy</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="bio_tools"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <expand macro="version_command"/> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 @CMD@ | |
| 11 ]]></command> | |
| 12 <configfiles> | |
| 13 <configfile name="script_file"><![CDATA[ | |
| 14 @CMD_imports@ | |
| 15 @CMD_read_inputs@ | |
| 16 | |
| 17 #if $method.method == 'pp.binarize' | |
| 18 esc.pp.binarize( | |
| 19 adata, | |
| 20 copy=False) | |
| 21 | |
| 22 #else if $method.method == 'pp.filter_cells' | |
| 23 esc.pp.filter_cells( | |
| 24 adata, | |
| 25 #if $method.filter.filter == 'min_counts' | |
| 26 min_counts=$method.filter.min_counts, | |
| 27 #else if $method.filter.filter == 'max_counts' | |
| 28 max_counts=$method.filter.max_counts, | |
| 29 #else if $method.filter.filter == 'min_features' | |
| 30 min_features=$method.filter.min_features, | |
| 31 #else if $method.filter.filter == 'max_features' | |
| 32 max_features=$method.filter.max_features, | |
| 33 #end if | |
| 34 copy=False) | |
| 35 | |
| 36 #else if $method.method == 'pp.filter_features' | |
| 37 esc.pp.filter_features( | |
| 38 adata, | |
| 39 #if $method.filter.filter == 'min_counts' | |
| 40 min_counts=$method.filter.min_counts, | |
| 41 #else if $method.filter.filter == 'max_counts' | |
| 42 max_counts=$method.filter.max_counts, | |
| 43 #else if $method.filter.filter == 'min_cells' | |
| 44 min_cells=$method.filter.min_cells, | |
| 45 #else if $method.filter.filter == 'max_cells' | |
| 46 max_cells=$method.filter.max_cells, | |
| 47 #end if | |
| 48 copy=False) | |
| 49 | |
| 50 #else if $method.method == 'nb_feat_log' | |
| 51 adata.obs['log_nb_features'] = [np.log10(x) for x in adata.obs['nb_features']] | |
| 52 | |
| 53 #else if $method.method == 'pp.coverage_cells' | |
| 54 esc.pp.coverage_cells( | |
| 55 adata, | |
| 56 binary=$method.binary, | |
| 57 log=$method.log, | |
| 58 #if $method.threshold | |
| 59 threshold=$method.threshold, | |
| 60 #end if | |
| 61 bins=$method.bins, | |
| 62 save='plot.png' | |
| 63 ) | |
| 64 #else if $method.method == 'pp.coverage_features' | |
| 65 esc.pp.coverage_features( | |
| 66 adata, | |
| 67 binary=$method.binary, | |
| 68 log=$method.log, | |
| 69 #if $method.threshold | |
| 70 threshold=$method.threshold, | |
| 71 #end if | |
| 72 bins=$method.bins, | |
| 73 save='plot.png' | |
| 74 ) | |
| 75 | |
| 76 #else if $method.method == 'pp.select_var_feature' | |
| 77 esc.pp.select_var_feature( | |
| 78 adata, | |
| 79 min_score=$method.min_score, | |
| 80 show=False, | |
| 81 #if $method.nb_features | |
| 82 nb_features=$method.nb_features | |
| 83 #end if | |
| 84 ) | |
| 85 | |
| 86 #else if $method.method == 'pp.cal_var' | |
| 87 esc.pp.cal_var( | |
| 88 adata, | |
| 89 show=True, | |
| 90 color=['b', 'r'], | |
| 91 save='plot.png' | |
| 92 ) | |
| 93 | |
| 94 #else if $method.method == 'pp.variability_features' | |
| 95 esc.pp.variability_features( | |
| 96 adata, | |
| 97 min_score=$method.min_score, | |
| 98 nb_features=$method.nb_features, | |
| 99 #if $method.log_mode | |
| 100 log='$method.log_mode', | |
| 101 #end if | |
| 102 save='plot.png' | |
| 103 ) | |
| 104 | |
| 105 #end if | |
| 106 @CMD_anndata_write_outputs@ | |
| 107 ]]></configfile> | |
| 108 </configfiles> | |
| 109 <inputs> | |
| 110 <expand macro="inputs_anndata"/> | |
| 111 <conditional name="method"> | |
| 112 <param argument="method" type="select" label="Method used for filtering"> | |
| 113 <option value="pp.binarize">Binarize count matrix, using 'pp.binarize'</option> | |
| 114 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of features expressed, using 'pp.filter_cells'</option> | |
| 115 <option value="pp.filter_features">Filter features based on counts and numbers of features expressed, using 'pp.filter_features'</option> | |
| 116 <option value="nb_feat_log">Compute log10 of nb_features</option> | |
| 117 <option value="pp.coverage_cells">Coverage cells: Histogram of the number of open features (in the case of ATAC-seq data) per cell, using 'pp.coverage_cells'</option> | |
| 118 <option value="pp.coverage_features">Coverage features: Distribution of the feature commoness in cells, using 'pp.coverage_features'</option> | |
| 119 <option value="pp.select_var_feature">Select the most variable features, 'pp.select_var_feature'</option> | |
| 120 <option value="pp.cal_var">Show distribution plots of cells sharing features and variability score 'pp.cal_var'</option> | |
| 121 <option value="pp.variability_features">Computes variability score to rank the most variable features across all cells, using 'pp.variability_features'</option> | |
| 122 </param> | |
| 123 <when value="pp.binarize" /> | |
| 124 <when value="pp.filter_cells"> | |
| 125 <conditional name="filter"> | |
| 126 <param argument="filter" type="select" label="Filter" help="Filter mode"> | |
| 127 <option value="min_counts">Minimum number of counts</option> | |
| 128 <option value="max_counts">Maximum number of counts</option> | |
| 129 <option value="min_features">Minimum number of features expressed</option> | |
| 130 <option value="max_features">Maximum number of features expressed</option> | |
| 131 </param> | |
| 132 <when value="min_counts"> | |
| 133 <param argument="min_counts" type="integer" min="0" value="" label="Minimum counts" help="Minimum number of counts required for a cell to pass filtering"/> | |
| 134 </when> | |
| 135 <when value="max_counts"> | |
| 136 <param argument="max_counts" type="integer" min="0" value="" label="Maximum counts" help="Maximum number of counts required for a cell to pass filtering"/> | |
| 137 </when> | |
| 138 <when value="min_features"> | |
| 139 <param argument="min_features" type="integer" min="0" value="" label="Minimum features" help="Minimum number of features expressed required for a cell to pass filtering"/> | |
| 140 </when> | |
| 141 <when value="max_features"> | |
| 142 <param argument="max_features" type="integer" min="0" value="" label="Maximum features" help="Maximum number of features expressed required for a cell to pass filtering"/> | |
| 143 </when> | |
| 144 </conditional> | |
| 145 </when> | |
| 146 <when value="pp.filter_features"> | |
| 147 <conditional name="filter"> | |
| 148 <param argument="filter" type="select" label="Filter"> | |
| 149 <option value="min_counts">Minimum number of counts</option> | |
| 150 <option value="max_counts">Maximum number of counts</option> | |
| 151 <option value="min_cells">Minimum number of cells expressed</option> | |
| 152 <option value="max_cells">Maximum number of cells expressed</option> | |
| 153 </param> | |
| 154 <when value="min_counts"> | |
| 155 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> | |
| 156 </when> | |
| 157 <when value="max_counts"> | |
| 158 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> | |
| 159 </when> | |
| 160 <when value="min_cells"> | |
| 161 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> | |
| 162 </when> | |
| 163 <when value="max_cells"> | |
| 164 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> | |
| 165 </when> | |
| 166 </conditional> | |
| 167 </when> | |
| 168 <when value="nb_feat_log" /> | |
| 169 <when value="pp.coverage_cells"> | |
| 170 <expand macro="coverage_params" /> | |
| 171 </when> | |
| 172 <when value="pp.coverage_features"> | |
| 173 <expand macro="coverage_params" /> | |
| 174 </when> | |
| 175 <when value="pp.select_var_feature"> | |
| 176 <param argument="min_score" type="float" min="0" max="1" value="0.5" label="Min score" help="Minimum threshold variability score to retain features" /> | |
| 177 <param argument="nb_features" type="integer" min="0" value="" optional="True" label="Number of features" help="Default value is None, if specify it will select a the top most | |
| 178 variable features. If this parameter is larger than the total number of feature, it filters based on the min_score argument." /> | |
| 179 </when> | |
| 180 <when value="pp.cal_var" /> | |
| 181 <when value="pp.variability_features"> | |
| 182 <param name="min_score" type="float" min="0" max="1" value="0.5" label="Minimum score value"/> | |
| 183 <param name="nb_features" type="integer" min="0" value="" label="Number of features"/> | |
| 184 <param name="log_mode" type="select" optional="True" label="Log" help="Log mode"> | |
| 185 <option value="log2">Log2</option> | |
| 186 <option value="log10">Log10</option> | |
| 187 </param> | |
| 188 </when> | |
| 189 </conditional> | |
| 190 <expand macro="inputs_common_advanced"/> | |
| 191 </inputs> | |
| 192 <outputs> | |
| 193 <expand macro="anndata_outputs"/> | |
| 194 <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) on ${on_string}"> | |
| 195 <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'select_var_feature'</filter> | |
| 196 </data> | |
| 197 </outputs> | |
| 198 <tests> | |
| 199 <test expect_num_outputs="1"> | |
| 200 <!-- pp.binarize --> | |
| 201 <param name="adata" value="krumsiek11.h5ad" /> | |
| 202 <conditional name="method"> | |
| 203 <param name="method" value="pp.binarize"/> | |
| 204 </conditional> | |
| 205 <output name="anndata_out" file="krumsiek11.pp.binarize.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 206 </test> | |
| 207 <test expect_num_outputs="2"> | |
| 208 <!-- pp.filter_cells --> | |
| 209 <param name="adata" value="krumsiek11.h5ad" /> | |
| 210 <conditional name="method"> | |
| 211 <param name="method" value="pp.filter_cells"/> | |
| 212 <conditional name="filter"> | |
| 213 <param name="filter" value="min_features"/> | |
| 214 <param name="min_features" value="10"/> | |
| 215 </conditional> | |
| 216 </conditional> | |
| 217 <section name="advanced_common"> | |
| 218 <param name="show_log" value="true" /> | |
| 219 </section> | |
| 220 <assert_stdout> | |
| 221 <has_text_matching expression="395 × 11"/> | |
| 222 </assert_stdout> | |
| 223 <output name="hidden_output"> | |
| 224 <assert_contents> | |
| 225 <has_text_matching expression="esc.pp.filter_cells"/> | |
| 226 <has_text_matching expression="min_features=10"/> | |
| 227 </assert_contents> | |
| 228 </output> | |
| 229 <output name="anndata_out" file="krumsiek11.pp.filter_cells.min_features.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 230 </test> | |
| 231 <test expect_num_outputs="2"> | |
| 232 <!-- pp.filter_features --> | |
| 233 <param name="adata" value="krumsiek11.h5ad" /> | |
| 234 <conditional name="method"> | |
| 235 <param name="method" value="pp.filter_features"/> | |
| 236 <conditional name="filter"> | |
| 237 <param name="filter" value="min_cells"/> | |
| 238 <param name="min_cells" value="600"/> | |
| 239 </conditional> | |
| 240 </conditional> | |
| 241 <section name="advanced_common"> | |
| 242 <param name="show_log" value="true" /> | |
| 243 </section> | |
| 244 <assert_stdout> | |
| 245 <has_text_matching expression="640 × 2"/> | |
| 246 </assert_stdout> | |
| 247 <output name="hidden_output"> | |
| 248 <assert_contents> | |
| 249 <has_text_matching expression="esc.pp.filter_features"/> | |
| 250 <has_text_matching expression="min_cells=600"/> | |
| 251 </assert_contents> | |
| 252 </output> | |
| 253 <output name="anndata_out" file="krumsiek11.pp.filter_features.min_cells.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 254 </test> | |
| 255 <test expect_num_outputs="1"> | |
| 256 <!-- nb_feat_log --> | |
| 257 <param name="adata" value="krumsiek11.pp.filter_cells.min_features.h5ad" /> | |
| 258 <conditional name="method"> | |
| 259 <param name="method" value="nb_feat_log"/> | |
| 260 </conditional> | |
| 261 <assert_stdout> | |
| 262 <has_text_matching expression="log_nb_features"/> | |
| 263 <has_text_matching expression="nb_features"/> | |
| 264 </assert_stdout> | |
| 265 </test> | |
| 266 <test expect_num_outputs="3"> | |
| 267 <!-- pp.select_var_feature --> | |
| 268 <param name="adata" value="krumsiek11.h5ad" /> | |
| 269 <conditional name="method"> | |
| 270 <param name="method" value="pp.select_var_feature"/> | |
| 271 <param name="min_score" value="0.6"/> | |
| 272 <param name="nb_features" value="10"/> | |
| 273 </conditional> | |
| 274 <section name="advanced_common"> | |
| 275 <param name="show_log" value="true" /> | |
| 276 </section> | |
| 277 <assert_stdout> | |
| 278 <has_text_matching expression="prop_shared_cells"/> | |
| 279 <has_text_matching expression="variability_score"/> | |
| 280 </assert_stdout> | |
| 281 <output name="hidden_output"> | |
| 282 <assert_contents> | |
| 283 <has_text_matching expression="esc.pp.select_var_feature"/> | |
| 284 <has_text_matching expression="adata"/> | |
| 285 <has_text_matching expression="min_score=0.6"/> | |
| 286 <has_text_matching expression="nb_features=10"/> | |
| 287 </assert_contents> | |
| 288 </output> | |
| 289 <output name="anndata_out" file="krumsiek11.pp.select_var_feature.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 290 </test> | |
| 291 <test expect_num_outputs="3"> | |
| 292 <!-- pp.cal_var --> | |
| 293 <param name="adata" value="krumsiek11.h5ad" /> | |
| 294 <conditional name="method"> | |
| 295 <param name="method" value="pp.cal_var"/> | |
| 296 </conditional> | |
| 297 <section name="advanced_common"> | |
| 298 <param name="show_log" value="true" /> | |
| 299 </section> | |
| 300 <output name="hidden_output"> | |
| 301 <assert_contents> | |
| 302 <has_text_matching expression="esc.pp.cal_var"/> | |
| 303 <has_text_matching expression="adata"/> | |
| 304 <has_text_matching expression="plot.png"/> | |
| 305 </assert_contents> | |
| 306 </output> | |
| 307 <output name="anndata_out" file="krumsiek11.pp.cal_var.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 308 <output name="out_png" file="krumsiek11.pp.cal_var.png" ftype="png" compare="sim_size"/> | |
| 309 </test> | |
| 310 <test expect_num_outputs="3"> | |
| 311 <!-- pp.coverage_cells --> | |
| 312 <param name="adata" value="krumsiek11.h5ad" /> | |
| 313 <conditional name="method"> | |
| 314 <param name="method" value="pp.coverage_cells"/> | |
| 315 <param name="threshold" value="3.0" /> | |
| 316 <param name="binary" value="True" /> | |
| 317 </conditional> | |
| 318 <section name="advanced_common"> | |
| 319 <param name="show_log" value="true" /> | |
| 320 </section> | |
| 321 <output name="hidden_output"> | |
| 322 <assert_contents> | |
| 323 <has_text_matching expression="esc.pp.coverage_cells"/> | |
| 324 <has_text_matching expression="adata"/> | |
| 325 <has_text_matching expression="threshold=3.0"/> | |
| 326 </assert_contents> | |
| 327 </output> | |
| 328 <output name="anndata_out" file="krumsiek11.pp.coverage_cells.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 329 <output name="out_png" file="krumsiek11.pp.coverage_cells.png" ftype="png" compare="sim_size"/> | |
| 330 </test> | |
| 331 <test expect_num_outputs="3"> | |
| 332 <!-- pp.coverage_features --> | |
| 333 <param name="adata" value="krumsiek11.h5ad" /> | |
| 334 <conditional name="method"> | |
| 335 <param name="method" value="pp.coverage_features"/> | |
| 336 <param name="threshold" value="100" /> | |
| 337 <param name="binary" value="True" /> | |
| 338 </conditional> | |
| 339 <section name="advanced_common"> | |
| 340 <param name="show_log" value="true" /> | |
| 341 </section> | |
| 342 <output name="hidden_output"> | |
| 343 <assert_contents> | |
| 344 <has_text_matching expression="esc.pp.coverage_features"/> | |
| 345 <has_text_matching expression="adata"/> | |
| 346 <has_text_matching expression="threshold=100"/> | |
| 347 </assert_contents> | |
| 348 </output> | |
| 349 <output name="anndata_out" file="krumsiek11.pp.coverage_features.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 350 <output name="out_png" file="krumsiek11.pp.coverage_features.png" ftype="png" compare="sim_size"/> | |
| 351 </test> | |
| 352 <test expect_num_outputs="3"> | |
| 353 <!-- pp.coverage_features --> | |
| 354 <param name="adata" value="krumsiek11.h5ad" /> | |
| 355 <conditional name="method"> | |
| 356 <param name="method" value="pp.variability_features"/> | |
| 357 <param name="min_score" value="0.75" /> | |
| 358 <param name="nb_features" value="8" /> | |
| 359 <param name="log" value="log10" /> | |
| 360 </conditional> | |
| 361 <section name="advanced_common"> | |
| 362 <param name="show_log" value="true" /> | |
| 363 </section> | |
| 364 <output name="hidden_output"> | |
| 365 <assert_contents> | |
| 366 <has_text_matching expression="esc.pp.variability_features"/> | |
| 367 <has_text_matching expression="adata"/> | |
| 368 <has_text_matching expression="min_score=0.75"/> | |
| 369 <has_text_matching expression="nb_features=8"/> | |
| 370 </assert_contents> | |
| 371 </output> | |
| 372 <output name="anndata_out" file="krumsiek11.pp.variability_features.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 373 <output name="out_png" file="krumsiek11.pp.variability_features.png" ftype="png" compare="sim_size"/> | |
| 374 </test> | |
| 375 </tests> | |
| 376 <help><![CDATA[ | |
| 377 | |
| 378 convert the count matrix into a binary matrix (`pp.binarize`) | |
| 379 ============================================================================================ | |
| 380 convert the count matrix into a binary matrix | |
| 381 | |
| 382 More details on the `episcanpy documentation | |
| 383 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.binarize.html>`__ | |
| 384 | |
| 385 Filter cells outliers based on counts and numbers of features expressed (`pp.filter_cells`) | |
| 386 ============================================================================================ | |
| 387 For instance, only keep cells with at least *min_counts* counts or *min_features* genes expressed. | |
| 388 This is to filter measurement outliers, i.e. "unreliable" observations. | |
| 389 | |
| 390 Only provide one of the optional parameters *min_counts*, *min_features*, *max_counts*, *max_features* per call. | |
| 391 | |
| 392 More details on the `episcanpy documentation | |
| 393 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_cells.html>`__ | |
| 394 | |
| 395 Filter features based on number of cells or counts (`pp.filter_features`) | |
| 396 ======================================================================================== | |
| 397 Keep features that have at least *min_counts* counts or are expressed in at least *min_cells* cells or | |
| 398 have at most *max_counts* counts or are expressed in at most *max_cells* cells. | |
| 399 | |
| 400 Only provide one of the optional parameters *min_counts*, *min_cells*, *max_counts*, *max_cells* per call. | |
| 401 | |
| 402 More details on the `episcanpy documentation | |
| 403 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_features.html>`__ | |
| 404 | |
| 405 Histogram of the number of open features (`pp.coverage_cells`) | |
| 406 ======================================================================================== | |
| 407 Histogram of the number of open features (in the case of ATAC-seq data) per cell. | |
| 408 | |
| 409 More details on the `episcanpy documentation | |
| 410 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_cells.html>`__ | |
| 411 | |
| 412 Distribution of the feature commoness in cells (`pp.coverage_features`) | |
| 413 ======================================================================================== | |
| 414 Display how often a feature is measured as open (for ATAC-seq). Distribution of the feature commoness in cells. | |
| 415 | |
| 416 More details on the `episcanpy documentation | |
| 417 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_features.html>`__ | |
| 418 | |
| 419 Selects the most variable features according to either a specified number of features or minimum variance score (`pp.select_var_feature`) | |
| 420 ========================================================================================================================================= | |
| 421 | |
| 422 This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). | |
| 423 | |
| 424 More details on the `episcanpy documentation | |
| 425 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.select_var_feature.html>`__ | |
| 426 | |
| 427 Distribution of cells sharing features and variability score (`pp.cal_var`) | |
| 428 ============================================================================= | |
| 429 | |
| 430 Show distribution plots of cells sharing features and variability score. | |
| 431 | |
| 432 More details on the `episcanpy documentation | |
| 433 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.cal_var.html>`__ | |
| 434 | |
| 435 Compute a variability score to rank the most variable features across all cells (`pp.variability_features`) | |
| 436 ============================================================================================================ | |
| 437 | |
| 438 This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). | |
| 439 | |
| 440 More details on the `episcanpy documentation | |
| 441 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.variability_features.html>`__ | |
| 442 ]]></help> | |
| 443 <expand macro="citations"/> | |
| 444 </tool> |
