Mercurial > repos > iuc > episcanpy_preprocess
view preprocess.xml @ 1:ed9b88a259f1 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit bb79cb8cad3bc1433bff7caf9d7b45e7993dd470
| author | iuc |
|---|---|
| date | Sat, 22 Apr 2023 12:12:40 +0000 |
| parents | 307f05e02a03 |
| children |
line wrap: on
line source
<tool id="episcanpy_preprocess" name="scATAC-seq Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>with EpiScanpy</description> <macros> <import>macros.xml</import> </macros> <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ @CMD@ ]]></command> <configfiles> <configfile name="script_file"><![CDATA[ @CMD_imports@ @CMD_read_inputs@ #if $method.method == 'pp.binarize' esc.pp.binarize( adata, copy=False) #else if $method.method == 'pp.filter_cells' esc.pp.filter_cells( adata, #if $method.filter.filter == 'min_counts' min_counts=$method.filter.min_counts, #else if $method.filter.filter == 'max_counts' max_counts=$method.filter.max_counts, #else if $method.filter.filter == 'min_features' min_features=$method.filter.min_features, #else if $method.filter.filter == 'max_features' max_features=$method.filter.max_features, #end if copy=False) #else if $method.method == 'pp.filter_features' esc.pp.filter_features( adata, #if $method.filter.filter == 'min_counts' min_counts=$method.filter.min_counts, #else if $method.filter.filter == 'max_counts' max_counts=$method.filter.max_counts, #else if $method.filter.filter == 'min_cells' min_cells=$method.filter.min_cells, #else if $method.filter.filter == 'max_cells' max_cells=$method.filter.max_cells, #end if copy=False) #else if $method.method == 'nb_feat_log' adata.obs['log_nb_features'] = [np.log10(x) for x in adata.obs['nb_features']] #else if $method.method == 'pp.coverage_cells' esc.pp.coverage_cells( adata, binary=$method.binary, log=$method.log, #if $method.threshold threshold=$method.threshold, #end if bins=$method.bins, save='plot.png' ) #else if $method.method == 'pp.coverage_features' esc.pp.coverage_features( adata, binary=$method.binary, log=$method.log, #if $method.threshold threshold=$method.threshold, #end if bins=$method.bins, save='plot.png' ) #else if $method.method == 'pp.select_var_feature' esc.pp.select_var_feature( adata, min_score=$method.min_score, show=False, #if $method.nb_features nb_features=$method.nb_features #end if ) #else if $method.method == 'pp.cal_var' esc.pp.cal_var( adata, show=True, color=['b', 'r'], save='plot.png' ) #else if $method.method == 'pp.variability_features' esc.pp.variability_features( adata, min_score=$method.min_score, nb_features=$method.nb_features, #if $method.log_mode log='$method.log_mode', #end if save='plot.png' ) #else if $method.method == 'tl.find_genes' esc.tl.find_genes( adata, gtf_file='$method.find_genes_gtf_file', key_added='$method.find_genes_key_added', upstream=$method.find_genes_upstream, feature_type='$method.find_genes_feature_type', annotation='$method.find_genes_annotation', raw=$method.find_genes_raw) #end if @CMD_anndata_write_outputs@ ]]></configfile> </configfiles> <inputs> <expand macro="inputs_anndata"/> <conditional name="method"> <param argument="method" type="select" label="Method used for filtering"> <option value="pp.binarize">Binarize count matrix, using 'pp.binarize'</option> <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of features expressed, using 'pp.filter_cells'</option> <option value="pp.filter_features">Filter features based on counts and numbers of features expressed, using 'pp.filter_features'</option> <option value="nb_feat_log">Compute log10 of nb_features</option> <option value="pp.coverage_cells">Coverage cells: Histogram of the number of open features (in the case of ATAC-seq data) per cell, using 'pp.coverage_cells'</option> <option value="pp.coverage_features">Coverage features: Distribution of the feature commoness in cells, using 'pp.coverage_features'</option> <option value="pp.select_var_feature">Select the most variable features, 'pp.select_var_feature'</option> <option value="pp.cal_var">Show distribution plots of cells sharing features and variability score 'pp.cal_var'</option> <option value="pp.variability_features">Computes variability score to rank the most variable features across all cells, using 'pp.variability_features'</option> <option value="tl.find_genes">Embedding: Find genes and add annotations, using 'pp.find_genes'</option> </param> <when value="pp.binarize" /> <when value="pp.filter_cells"> <conditional name="filter"> <param argument="filter" type="select" label="Filter" help="Filter mode"> <option value="min_counts">Minimum number of counts</option> <option value="max_counts">Maximum number of counts</option> <option value="min_features">Minimum number of features expressed</option> <option value="max_features">Maximum number of features expressed</option> </param> <when value="min_counts"> <param argument="min_counts" type="integer" min="0" value="" label="Minimum counts" help="Minimum number of counts required for a cell to pass filtering"/> </when> <when value="max_counts"> <param argument="max_counts" type="integer" min="0" value="" label="Maximum counts" help="Maximum number of counts required for a cell to pass filtering"/> </when> <when value="min_features"> <param argument="min_features" type="integer" min="0" value="" label="Minimum features" help="Minimum number of features expressed required for a cell to pass filtering"/> </when> <when value="max_features"> <param argument="max_features" type="integer" min="0" value="" label="Maximum features" help="Maximum number of features expressed required for a cell to pass filtering"/> </when> </conditional> </when> <when value="pp.filter_features"> <conditional name="filter"> <param argument="filter" type="select" label="Filter"> <option value="min_counts">Minimum number of counts</option> <option value="max_counts">Maximum number of counts</option> <option value="min_cells">Minimum number of cells expressed</option> <option value="max_cells">Maximum number of cells expressed</option> </param> <when value="min_counts"> <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> </when> <when value="max_counts"> <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> </when> <when value="min_cells"> <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> </when> <when value="max_cells"> <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> </when> </conditional> </when> <when value="nb_feat_log" /> <when value="pp.coverage_cells"> <expand macro="coverage_params" /> </when> <when value="pp.coverage_features"> <expand macro="coverage_params" /> </when> <when value="pp.select_var_feature"> <param argument="min_score" type="float" min="0" max="1" value="0.5" label="Min score" help="Minimum threshold variability score to retain features" /> <param argument="nb_features" type="integer" min="0" value="" optional="True" label="Number of features" help="Default value is None, if specify it will select a the top most variable features. If this parameter is larger than the total number of feature, it filters based on the min_score argument." /> </when> <when value="pp.cal_var" /> <when value="pp.variability_features"> <param name="min_score" type="float" min="0" max="1" value="0.5" label="Minimum score value"/> <param name="nb_features" type="integer" min="0" value="" label="Number of features"/> <param name="log_mode" type="select" optional="True" label="Log" help="Log mode"> <option value="log2">Log2</option> <option value="log10">Log10</option> </param> </when> <when value="tl.find_genes"> <param name="find_genes_gtf_file" type="data" format="gtf" label="Annotation GTF file" help="(gtf_file)"/> <param name="find_genes_key_added" value="transcript_annotation" type="text" label="Key added" help="(key_added)"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="_" /> <add value="-" /> <add value="." /> <add value=" " /> <add value="," /> </valid> </sanitizer> <validator type="regex">[0-9a-zA-Z_., -]+</validator> </param> <param name="find_genes_upstream" value="2000" min="0" type="integer" label="Upstream" help="(upstream)"/> <param name="find_genes_feature_type" value="transcript" type="text" label="Feature type" help="(feature_type)"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="_" /> <add value="-" /> <add value="." /> <add value=" " /> <add value="," /> </valid> </sanitizer> <validator type="regex">[0-9a-zA-Z_., -]+</validator> </param> <param name="find_genes_annotation" value="HAVANA" type="text" label="Annotation" help="(annotation)"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="_" /> <add value="-" /> <add value="." /> <add value=" " /> <add value="," /> </valid> </sanitizer> <validator type="regex">[0-9a-zA-Z_., -]+</validator> </param> <param name="find_genes_raw" type="select" label="Raw?" help="(raw)"> <option value="True">True</option> <option value="False" selected="true">False</option> </param> </when> </conditional> <expand macro="inputs_common_advanced"/> </inputs> <outputs> <expand macro="anndata_outputs"/> <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) plot on ${on_string}"> <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'pp.select_var_feature' and method['method'] !='tl.find_genes'</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <!-- pp.binarize --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.binarize"/> </conditional> <output name="anndata_out" file="krumsiek11.pp.binarize.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test expect_num_outputs="2"> <!-- pp.filter_cells --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_cells"/> <conditional name="filter"> <param name="filter" value="min_features"/> <param name="min_features" value="10"/> </conditional> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <assert_stdout> <has_text_matching expression="395 × 11"/> </assert_stdout> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.filter_cells"/> <has_text_matching expression="min_features=10"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.filter_cells.min_features.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test expect_num_outputs="2"> <!-- pp.filter_features --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_features"/> <conditional name="filter"> <param name="filter" value="min_cells"/> <param name="min_cells" value="600"/> </conditional> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <assert_stdout> <has_text_matching expression="640 × 2"/> </assert_stdout> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.filter_features"/> <has_text_matching expression="min_cells=600"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.filter_features.min_cells.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test expect_num_outputs="1"> <!-- nb_feat_log --> <param name="adata" value="krumsiek11.pp.filter_cells.min_features.h5ad" /> <conditional name="method"> <param name="method" value="nb_feat_log"/> </conditional> <assert_stdout> <has_text_matching expression="log_nb_features"/> <has_text_matching expression="nb_features"/> </assert_stdout> </test> <test expect_num_outputs="2"> <!-- pp.select_var_feature --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.select_var_feature"/> <param name="min_score" value="0.6"/> <param name="nb_features" value="10"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <assert_stdout> <has_text_matching expression="prop_shared_cells"/> <has_text_matching expression="variability_score"/> </assert_stdout> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.select_var_feature"/> <has_text_matching expression="adata"/> <has_text_matching expression="min_score=0.6"/> <has_text_matching expression="nb_features=10"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.select_var_feature.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test expect_num_outputs="3"> <!-- pp.cal_var --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.cal_var"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.cal_var"/> <has_text_matching expression="adata"/> <has_text_matching expression="plot.png"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.cal_var.h5ad" ftype="h5ad" compare="sim_size"/> <output name="out_png" file="krumsiek11.pp.cal_var.png" ftype="png" compare="sim_size"/> </test> <test expect_num_outputs="3"> <!-- pp.coverage_cells --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.coverage_cells"/> <param name="threshold" value="3.0" /> <param name="binary" value="True" /> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.coverage_cells"/> <has_text_matching expression="adata"/> <has_text_matching expression="threshold=3.0"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.coverage_cells.h5ad" ftype="h5ad" compare="sim_size"/> <output name="out_png" file="krumsiek11.pp.coverage_cells.png" ftype="png" compare="sim_size"/> </test> <test expect_num_outputs="3"> <!-- pp.coverage_features --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.coverage_features"/> <param name="threshold" value="100" /> <param name="binary" value="True" /> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.coverage_features"/> <has_text_matching expression="adata"/> <has_text_matching expression="threshold=100"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.coverage_features.h5ad" ftype="h5ad" compare="sim_size"/> <output name="out_png" file="krumsiek11.pp.coverage_features.png" ftype="png" compare="sim_size"/> </test> <test expect_num_outputs="3"> <!-- pp.coverage_features --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.variability_features"/> <param name="min_score" value="0.75" /> <param name="nb_features" value="8" /> <param name="log" value="log10" /> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="esc.pp.variability_features"/> <has_text_matching expression="adata"/> <has_text_matching expression="min_score=0.75"/> <has_text_matching expression="nb_features=8"/> </assert_contents> </output> <output name="anndata_out" file="krumsiek11.pp.variability_features.h5ad" ftype="h5ad" compare="sim_size"/> <output name="out_png" file="krumsiek11.pp.variability_features.png" ftype="png" compare="sim_size"/> </test> <test expect_num_outputs="2"> <!-- tl.find_genes --> <param name="adata" value="chrY.h5ad" /> <conditional name="method"> <param name="method" value="tl.find_genes"/> <param name="find_genes_gtf_file" value="chrY.gtf"/> <param name="find_genes_key_added" value="transcript_annotation"/> <param name="find_genes_upstream" value="2000"/> <param name="find_genes_feature_type" value="transcript"/> <param name="find_genes_annotation" value="HAVANA"/> <param name="find_genes_raw" value="False"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="anndata_out" file="chrY_with_transcript_annotation.h5ad" ftype="h5ad" compare="sim_size"> <assert_contents> <has_h5_keys keys="var" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ convert the count matrix into a binary matrix (`pp.binarize`) ============================================================================================ convert the count matrix into a binary matrix More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.binarize.html>`__ Filter cells outliers based on counts and numbers of features expressed (`pp.filter_cells`) ============================================================================================ For instance, only keep cells with at least *min_counts* counts or *min_features* genes expressed. This is to filter measurement outliers, i.e. "unreliable" observations. Only provide one of the optional parameters *min_counts*, *min_features*, *max_counts*, *max_features* per call. More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_cells.html>`__ Filter features based on number of cells or counts (`pp.filter_features`) ======================================================================================== Keep features that have at least *min_counts* counts or are expressed in at least *min_cells* cells or have at most *max_counts* counts or are expressed in at most *max_cells* cells. Only provide one of the optional parameters *min_counts*, *min_cells*, *max_counts*, *max_cells* per call. More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_features.html>`__ Histogram of the number of open features (`pp.coverage_cells`) ======================================================================================== Histogram of the number of open features (in the case of ATAC-seq data) per cell. More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_cells.html>`__ Distribution of the feature commoness in cells (`pp.coverage_features`) ======================================================================================== Display how often a feature is measured as open (for ATAC-seq). Distribution of the feature commoness in cells. More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_features.html>`__ Selects the most variable features according to either a specified number of features or minimum variance score (`pp.select_var_feature`) ========================================================================================================================================= This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.select_var_feature.html>`__ Distribution of cells sharing features and variability score (`pp.cal_var`) ============================================================================= Show distribution plots of cells sharing features and variability score. More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.cal_var.html>`__ Compute a variability score to rank the most variable features across all cells (`pp.variability_features`) ============================================================================================================ This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). Find and add gene annotations (`tl.find_genes`) ======================================================================================== This function adds a gene annotation to an AnnData (h5ad) file from annotations file (.annotation.gtf). More details on the `episcanpy documentation <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.variability_features.html>`__ ]]></help> <expand macro="citations"/> </tool>
