Mercurial > repos > iuc > episcanpy_preprocess
diff preprocess.xml @ 0:307f05e02a03 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
| author | iuc |
|---|---|
| date | Tue, 18 Apr 2023 13:17:48 +0000 |
| parents | |
| children | ed9b88a259f1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preprocess.xml Tue Apr 18 13:17:48 2023 +0000 @@ -0,0 +1,444 @@ +<tool id="episcanpy_preprocess" name="scATAC-seq Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>with EpiScanpy</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="bio_tools"/> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ + +#if $method.method == 'pp.binarize' +esc.pp.binarize( + adata, + copy=False) + +#else if $method.method == 'pp.filter_cells' +esc.pp.filter_cells( + adata, + #if $method.filter.filter == 'min_counts' + min_counts=$method.filter.min_counts, + #else if $method.filter.filter == 'max_counts' + max_counts=$method.filter.max_counts, + #else if $method.filter.filter == 'min_features' + min_features=$method.filter.min_features, + #else if $method.filter.filter == 'max_features' + max_features=$method.filter.max_features, + #end if + copy=False) + +#else if $method.method == 'pp.filter_features' +esc.pp.filter_features( + adata, + #if $method.filter.filter == 'min_counts' + min_counts=$method.filter.min_counts, + #else if $method.filter.filter == 'max_counts' + max_counts=$method.filter.max_counts, + #else if $method.filter.filter == 'min_cells' + min_cells=$method.filter.min_cells, + #else if $method.filter.filter == 'max_cells' + max_cells=$method.filter.max_cells, + #end if + copy=False) + +#else if $method.method == 'nb_feat_log' +adata.obs['log_nb_features'] = [np.log10(x) for x in adata.obs['nb_features']] + +#else if $method.method == 'pp.coverage_cells' +esc.pp.coverage_cells( + adata, + binary=$method.binary, + log=$method.log, + #if $method.threshold + threshold=$method.threshold, + #end if + bins=$method.bins, + save='plot.png' + ) +#else if $method.method == 'pp.coverage_features' +esc.pp.coverage_features( + adata, + binary=$method.binary, + log=$method.log, + #if $method.threshold + threshold=$method.threshold, + #end if + bins=$method.bins, + save='plot.png' + ) + +#else if $method.method == 'pp.select_var_feature' +esc.pp.select_var_feature( + adata, + min_score=$method.min_score, + show=False, + #if $method.nb_features + nb_features=$method.nb_features + #end if +) + +#else if $method.method == 'pp.cal_var' +esc.pp.cal_var( + adata, + show=True, + color=['b', 'r'], + save='plot.png' +) + +#else if $method.method == 'pp.variability_features' +esc.pp.variability_features( + adata, + min_score=$method.min_score, + nb_features=$method.nb_features, + #if $method.log_mode + log='$method.log_mode', + #end if + save='plot.png' +) + +#end if +@CMD_anndata_write_outputs@ +]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs_anndata"/> + <conditional name="method"> + <param argument="method" type="select" label="Method used for filtering"> + <option value="pp.binarize">Binarize count matrix, using 'pp.binarize'</option> + <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of features expressed, using 'pp.filter_cells'</option> + <option value="pp.filter_features">Filter features based on counts and numbers of features expressed, using 'pp.filter_features'</option> + <option value="nb_feat_log">Compute log10 of nb_features</option> + <option value="pp.coverage_cells">Coverage cells: Histogram of the number of open features (in the case of ATAC-seq data) per cell, using 'pp.coverage_cells'</option> + <option value="pp.coverage_features">Coverage features: Distribution of the feature commoness in cells, using 'pp.coverage_features'</option> + <option value="pp.select_var_feature">Select the most variable features, 'pp.select_var_feature'</option> + <option value="pp.cal_var">Show distribution plots of cells sharing features and variability score 'pp.cal_var'</option> + <option value="pp.variability_features">Computes variability score to rank the most variable features across all cells, using 'pp.variability_features'</option> + </param> + <when value="pp.binarize" /> + <when value="pp.filter_cells"> + <conditional name="filter"> + <param argument="filter" type="select" label="Filter" help="Filter mode"> + <option value="min_counts">Minimum number of counts</option> + <option value="max_counts">Maximum number of counts</option> + <option value="min_features">Minimum number of features expressed</option> + <option value="max_features">Maximum number of features expressed</option> + </param> + <when value="min_counts"> + <param argument="min_counts" type="integer" min="0" value="" label="Minimum counts" help="Minimum number of counts required for a cell to pass filtering"/> + </when> + <when value="max_counts"> + <param argument="max_counts" type="integer" min="0" value="" label="Maximum counts" help="Maximum number of counts required for a cell to pass filtering"/> + </when> + <when value="min_features"> + <param argument="min_features" type="integer" min="0" value="" label="Minimum features" help="Minimum number of features expressed required for a cell to pass filtering"/> + </when> + <when value="max_features"> + <param argument="max_features" type="integer" min="0" value="" label="Maximum features" help="Maximum number of features expressed required for a cell to pass filtering"/> + </when> + </conditional> + </when> + <when value="pp.filter_features"> + <conditional name="filter"> + <param argument="filter" type="select" label="Filter"> + <option value="min_counts">Minimum number of counts</option> + <option value="max_counts">Maximum number of counts</option> + <option value="min_cells">Minimum number of cells expressed</option> + <option value="max_cells">Maximum number of cells expressed</option> + </param> + <when value="min_counts"> + <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> + </when> + <when value="max_counts"> + <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> + </when> + <when value="min_cells"> + <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> + </when> + <when value="max_cells"> + <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> + </when> + </conditional> + </when> + <when value="nb_feat_log" /> + <when value="pp.coverage_cells"> + <expand macro="coverage_params" /> + </when> + <when value="pp.coverage_features"> + <expand macro="coverage_params" /> + </when> + <when value="pp.select_var_feature"> + <param argument="min_score" type="float" min="0" max="1" value="0.5" label="Min score" help="Minimum threshold variability score to retain features" /> + <param argument="nb_features" type="integer" min="0" value="" optional="True" label="Number of features" help="Default value is None, if specify it will select a the top most + variable features. If this parameter is larger than the total number of feature, it filters based on the min_score argument." /> + </when> + <when value="pp.cal_var" /> + <when value="pp.variability_features"> + <param name="min_score" type="float" min="0" max="1" value="0.5" label="Minimum score value"/> + <param name="nb_features" type="integer" min="0" value="" label="Number of features"/> + <param name="log_mode" type="select" optional="True" label="Log" help="Log mode"> + <option value="log2">Log2</option> + <option value="log10">Log10</option> + </param> + </when> + </conditional> + <expand macro="inputs_common_advanced"/> + </inputs> + <outputs> + <expand macro="anndata_outputs"/> + <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) on ${on_string}"> + <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'select_var_feature'</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <!-- pp.binarize --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.binarize"/> + </conditional> + <output name="anndata_out" file="krumsiek11.pp.binarize.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.filter_cells --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.filter_cells"/> + <conditional name="filter"> + <param name="filter" value="min_features"/> + <param name="min_features" value="10"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <assert_stdout> + <has_text_matching expression="395 × 11"/> + </assert_stdout> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.filter_cells"/> + <has_text_matching expression="min_features=10"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.filter_cells.min_features.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.filter_features --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.filter_features"/> + <conditional name="filter"> + <param name="filter" value="min_cells"/> + <param name="min_cells" value="600"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <assert_stdout> + <has_text_matching expression="640 × 2"/> + </assert_stdout> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.filter_features"/> + <has_text_matching expression="min_cells=600"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.filter_features.min_cells.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="1"> + <!-- nb_feat_log --> + <param name="adata" value="krumsiek11.pp.filter_cells.min_features.h5ad" /> + <conditional name="method"> + <param name="method" value="nb_feat_log"/> + </conditional> + <assert_stdout> + <has_text_matching expression="log_nb_features"/> + <has_text_matching expression="nb_features"/> + </assert_stdout> + </test> + <test expect_num_outputs="3"> + <!-- pp.select_var_feature --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.select_var_feature"/> + <param name="min_score" value="0.6"/> + <param name="nb_features" value="10"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <assert_stdout> + <has_text_matching expression="prop_shared_cells"/> + <has_text_matching expression="variability_score"/> + </assert_stdout> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.select_var_feature"/> + <has_text_matching expression="adata"/> + <has_text_matching expression="min_score=0.6"/> + <has_text_matching expression="nb_features=10"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.select_var_feature.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="3"> + <!-- pp.cal_var --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.cal_var"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.cal_var"/> + <has_text_matching expression="adata"/> + <has_text_matching expression="plot.png"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.cal_var.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="out_png" file="krumsiek11.pp.cal_var.png" ftype="png" compare="sim_size"/> + </test> + <test expect_num_outputs="3"> + <!-- pp.coverage_cells --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.coverage_cells"/> + <param name="threshold" value="3.0" /> + <param name="binary" value="True" /> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.coverage_cells"/> + <has_text_matching expression="adata"/> + <has_text_matching expression="threshold=3.0"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.coverage_cells.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="out_png" file="krumsiek11.pp.coverage_cells.png" ftype="png" compare="sim_size"/> + </test> + <test expect_num_outputs="3"> + <!-- pp.coverage_features --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.coverage_features"/> + <param name="threshold" value="100" /> + <param name="binary" value="True" /> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.coverage_features"/> + <has_text_matching expression="adata"/> + <has_text_matching expression="threshold=100"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.coverage_features.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="out_png" file="krumsiek11.pp.coverage_features.png" ftype="png" compare="sim_size"/> + </test> + <test expect_num_outputs="3"> + <!-- pp.coverage_features --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.variability_features"/> + <param name="min_score" value="0.75" /> + <param name="nb_features" value="8" /> + <param name="log" value="log10" /> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.variability_features"/> + <has_text_matching expression="adata"/> + <has_text_matching expression="min_score=0.75"/> + <has_text_matching expression="nb_features=8"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.variability_features.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="out_png" file="krumsiek11.pp.variability_features.png" ftype="png" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + +convert the count matrix into a binary matrix (`pp.binarize`) +============================================================================================ +convert the count matrix into a binary matrix + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.binarize.html>`__ + +Filter cells outliers based on counts and numbers of features expressed (`pp.filter_cells`) +============================================================================================ +For instance, only keep cells with at least *min_counts* counts or *min_features* genes expressed. +This is to filter measurement outliers, i.e. "unreliable" observations. + +Only provide one of the optional parameters *min_counts*, *min_features*, *max_counts*, *max_features* per call. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_cells.html>`__ + +Filter features based on number of cells or counts (`pp.filter_features`) +======================================================================================== +Keep features that have at least *min_counts* counts or are expressed in at least *min_cells* cells or +have at most *max_counts* counts or are expressed in at most *max_cells* cells. + +Only provide one of the optional parameters *min_counts*, *min_cells*, *max_counts*, *max_cells* per call. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_features.html>`__ + +Histogram of the number of open features (`pp.coverage_cells`) +======================================================================================== +Histogram of the number of open features (in the case of ATAC-seq data) per cell. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_cells.html>`__ + +Distribution of the feature commoness in cells (`pp.coverage_features`) +======================================================================================== +Display how often a feature is measured as open (for ATAC-seq). Distribution of the feature commoness in cells. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_features.html>`__ + +Selects the most variable features according to either a specified number of features or minimum variance score (`pp.select_var_feature`) +========================================================================================================================================= + +This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.select_var_feature.html>`__ + +Distribution of cells sharing features and variability score (`pp.cal_var`) +============================================================================= + +Show distribution plots of cells sharing features and variability score. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.cal_var.html>`__ + +Compute a variability score to rank the most variable features across all cells (`pp.variability_features`) +============================================================================================================ + +This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.variability_features.html>`__ + ]]></help> + <expand macro="citations"/> +</tool>
