scanpy_inspect: inspect.xml comparison

comparison inspect.xml @ 14:1c36180febfb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb

author	iuc
date	Sat, 14 Sep 2024 12:40:40 +0000
parents	03ed427eb5e7
children	b6035c5e7e12

comparison

equal deleted inserted replaced

-:9329ecce0019
+:1c36180febfb
-<tool id="scanpy_inspect" name="Inspect and manipulate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
+<tool id="scanpy_inspect" name="Scanpy Inspect and manipulate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-<description> with scanpy</description>
 <macros>
 <import>macros.xml</import>
-<xml name="score_genes_params">
+<xml name="params_score_genes">
-<param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
+<param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling"/>
-<param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
+<param argument="random_state" type="integer" value="0" label="Random seed for sampling"/>
 <expand macro="param_use_raw"/>
 </xml>
-<token name="@CMD_score_genes_inputs@"><![CDATA[
+<token name="@CMD_PARAMS_SCORE_GENES@"><![CDATA[
 n_bins=$method.n_bins,
 random_state=$method.random_state,
 use_raw=$method.use_raw,
 copy=False
-]]></token>
+]]>
+</token>
 <xml name="corr_method">
 <param argument="corr_method" type="select" label="P-value correction method">
-<option value="benjamini-hochberg">Benjamini-Hochberg</option>
+<option value="benjamini-hochberg" selected="true">Benjamini-Hochberg</option>
 <option value="bonferroni">Bonferroni</option>
 </param>
 </xml>
 <xml name="fit_intercept">
-<param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true"
+<param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should a constant (a.k.a. bias or intercept) be added to the decision function?"/>
-label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/>
 </xml>
 <xml name="max_iter">
-<param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/>
+<param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge"/>
 </xml>
 <xml name="multi_class">
-<param argument="multi_class" type="select" label="Multi class" help="">
+<param argument="multi_class" type="select" label="Multi class">
+<option value="auto" selected="true">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
 <option value="ovr">ovr: a binary problem is fit for each label</option>
 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option>
-<option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
 </param>
 </xml>
 <xml name="penalty">
-<param argument="penalty" type="select" label="Norm used in the penalization" help="">
+<param argument="penalty" type="select" label="Norm used in the penalization">
+<option value="l2" selected="true">l2</option>
 <option value="l1">l1</option>
-<option value="l2">l2</option>
+<yield/>
-<option value="customized">customized</option>
-</param>
-</xml>
-<xml name="custom_penalty">
-<param argument="pen" type="text" value="" label="Norm used in the penalization" help="">
-<expand macro="sanitize_query" />
 </param>
 </xml>
 <xml name="random_state">
-<param argument="random_state" type="integer" value="" optional="true"
+<param argument="random_state" type="integer" value="" optional="true" label="The seed of the pseudo random number generator to use when shuffling the data"/>
-label="The seed of the pseudo random number generator to use when shuffling the data" help=""/>
 </xml>
 </macros>
 <expand macro="bio_tools"/>
-<expand macro="requirements"/>
+<expand macro="requirements">
+<requirement type="package" version="1.5.1">scikit-learn</requirement>
+</expand>
 <expand macro="version_command"/>
 <command detect_errors="exit_code"><![CDATA[
 @CMD@
 ]]></command>
 <configfiles>
 <configfile name="script_file"><![CDATA[
-@CMD_imports@
+@CMD_IMPORTS@
-@CMD_read_inputs@
+@CMD_READ_INPUTS@
-#if $method.method == "pp.calculate_qc_metrics"
+#if str($method.method) == 'pp.calculate_qc_metrics':
 sc.pp.calculate_qc_metrics(
 adata=adata,
 expr_type='$method.expr_type',
 var_type='$method.var_type',
-#if $method.qc_vars
+#if str($method.qc_vars) != '':
 #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')]
 qc_vars=$qc_vars,
 #end if
-#if $method.percent_top
+#if str($method.percent_top) != '':
 #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')]
 percent_top=$percent_top,
 #end if
+#if str($method.layer) != '':
+layer='$method.layer',
+#end if
+use_raw=$method.use_raw,
+log1p=$method.log1p,
 inplace=True)
-#else if $method.method == "tl.score_genes"
+#else if str($method.method) == 'pp.neighbors':
+sc.pp.neighbors(
+adata=adata,
+n_neighbors=$method.n_neighbors,
+#if str($method.n_pcs) != '':
+n_pcs=$method.n_pcs,
+#end if
+#if str($method.use_rep) != '':
+use_rep='$method.use_rep',
+#end if
+knn=$method.knn,
+method='$method.pp_neighbors_method',
+metric='$method.metric',
+random_state=$method.random_state,
+#if str($method.key_added) != '':
+key_added='$method.key_added',
+#end if
+copy=False)
+#else if str($method.method) == 'tl.score_genes':
 sc.tl.score_genes(
 adata=adata,
 #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')]
 gene_list=$gene_list,
 ctrl_size=$method.ctrl_size,
+#if str($method.gene_pool) != '':
+#set $gene_pool = [str(x.strip()) for x in str($method.gene_pool).split(',')]
+gene_pool=$gene_pool,
+#end if
 score_name='$method.score_name',
-#if $method.gene_pool
+@CMD_PARAMS_SCORE_GENES@)
-#set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')]
-gene_pool=$gene_pool,
+#else if str($method.method) == 'tl.score_genes_cell_cycle':
-#end if
+#if str($method.s_genes.format) == 'file':
-@CMD_score_genes_inputs@)
-#else if $method.method == "tl.score_genes_cell_cycle"
-#if str($method.s_genes.format) == 'file'
 with open('$method.s_genes.file', 'r') as s_genes_f:
 s_genes = [str(x.strip()) for x in s_genes_f.readlines()]
 print(s_genes)
 #end if
-#if str($method.g2m_genes.format) == 'file'
+#if str($method.g2m_genes.format) == 'file':
 with open('$method.g2m_genes.file', 'r') as g2m_genes_f:
 g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()]
 print(g2m_genes)
 #end if
 sc.tl.score_genes_cell_cycle(
 adata=adata,
-#if str($method.s_genes.format) == 'text'
+#if str($method.s_genes.format) == 'text':
-#set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')]
+#set $s_genes = [str(x.strip()) for x in str($method.s_genes.text).split(',')]
 s_genes=$s_genes,
-#else if str($method.s_genes.format) == 'file'
+#else if str($method.s_genes.format) == 'file':
 s_genes=s_genes,
 #end if
-#if str($method.g2m_genes.format) == 'text'
+#if str($method.g2m_genes.format) == 'text':
-#set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')]
+#set $g2m_genes = [str(x.strip()) for x in str($method.g2m_genes.text).split(',')]
 g2m_genes=$g2m_genes,
-#else if str($method.g2m_genes.format) == 'file'
+#else if str($method.g2m_genes.format) == 'file':
 g2m_genes=g2m_genes,
 #end if
-@CMD_score_genes_inputs@)
+@CMD_PARAMS_SCORE_GENES@)
-#else if $method.method == 'pp.neighbors'
+#else if str($method.method) == 'tl.rank_genes_groups':
-sc.pp.neighbors(
-adata=adata,
-n_neighbors=$method.n_neighbors,
-#if str($method.n_pcs) != ''
-n_pcs=$method.n_pcs,
-#end if
-#if $method.use_rep
-use_rep='$method.use_rep',
-#end if
-knn=$method.knn,
-random_state=$method.random_state,
-method='$method.pp_neighbors_method',
-metric='$method.metric',
-copy=False)
-#else if $method.method == 'tl.rank_genes_groups'
 sc.tl.rank_genes_groups(
 adata=adata,
+#if str($method.groupby) != '':
 groupby='$method.groupby',
-#if $method.groups
+#end if
+use_raw=$method.use_raw,
+#if str($method.groups) != '':
 #set $group=[x.strip() for x in str($method.groups).split(',')]
-groups=$group,
+groups='$group',
 #end if
-#if $method.ref.rest == 'rest'
+#if str($method.layer) != '':
+layer='$method.layer',
+#end if
+#if str($method.ref.rest) == 'rest':
 reference='$method.ref.rest',
 #else
 reference='$method.ref.reference',
 #end if
+#if str($method.n_genes) != '':
 n_genes=$method.n_genes,
+#end if
 method='$method.tl_rank_genes_groups_method.method',
-#if $method.tl_rank_genes_groups_method.method != 'logreg'
+#if str($method.tl_rank_genes_groups_method.method) != 'logreg':
 corr_method='$method.tl_rank_genes_groups_method.corr_method',
-#else
+#end if
+#if str($method.tl_rank_genes_groups_method.method) == 'wilcoxon':
+tie_correct=$method.tl_rank_genes_groups_method.tie_correct,
+#end if
+#if str($method.tl_rank_genes_groups_method.method) == 'logreg':
 solver='$method.tl_rank_genes_groups_method.solver.solver',
-#if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg'
+#if str($method.tl_rank_genes_groups_method.solver.solver) == 'lbfgs':
-penalty='l2',
+penalty='$method.tl_rank_genes_groups_method.solver.penalty',
 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
-#else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs'
+#else if str($method.tl_rank_genes_groups_method.solver.solver) == 'newton-cg':
-penalty='l2',
+penalty='$method.tl_rank_genes_groups_method.solver.penalty',
 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
-#else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear'
+#else if str($method.tl_rank_genes_groups_method.solver.solver) == 'liblinear':
-#if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
+#if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l1':
 penalty='l1',
-#else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
+#else:
 penalty='l2',
 dual=$method.tl_rank_genes_groups_method.solver.penalty.dual,
-#else
-penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
 #end if
 fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept,
-#if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True'
+#if str($method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept) == 'True':
 intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling,
 #end if
-#if str($method.tl_rank_genes_groups_method.solver.random_state) != ''
+#if str($method.tl_rank_genes_groups_method.solver.random_state) != '':
 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
 #end if
-#else if $method.tl_rank_genes_groups_method.solver.solver == 'sag'
+#else if str($method.tl_rank_genes_groups_method.solver.solver) == 'sag':
-penalty='l2',
+penalty='$method.tl_rank_genes_groups_method.solver.penalty.penalty',
 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
-#if str($method.tl_rank_genes_groups_method.solver.random_state) != ''
+#if str($method.tl_rank_genes_groups_method.solver.random_state) != '':
 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
 #end if
 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
-multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+multi_class=$method.tl_rank_genes_groups_method.solver.multi_class,
-#else if $method.tl_rank_genes_groups_method.solver.solver == 'saga'
+#else if str($method.tl_rank_genes_groups_method.solver.solver) == 'saga':
-#if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
+#if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l1':
 penalty='l1',
-#else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
+#else if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l2':
 penalty='l2',
-#else
+#else if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'elasticnet':
-penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
+penalty='elasticnet',
+#else:
+penalty='None',
 #end if
 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
-multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+multi_class=$method.tl_rank_genes_groups_method.solver.multi_class,
 #end if
 tol=$method.tl_rank_genes_groups_method.tol,
 C=$method.tl_rank_genes_groups_method.c,
 #end if
-use_raw=$method.use_raw)
+#if str($method.key_added) != '':
+key_added='$method.key_added',
-#else if $method.method == "tl.marker_gene_overlap"
+#end if
+copy=False)
+#else if str($method.method) == "tl.marker_gene_overlap":
 reference_markers = {}
 #for $i, $s in enumerate($method.reference_markers)
 #set $list=[x.strip() for x in str($s.values).split(',')]
 reference_markers['$s.key'] = $list
 #end for
-sc.tl.marker_gene_overlap(
+# Temporary fix for the issue with "inplace=True" for Pandas dataframes.
-adata,
+# see here: https://github.com/scverse/scanpy/blob/b6193502e11b84fc1b4a011ee9cf08a19da22ebf/src/scanpy/tools/_marker_gene_overlap.py#L167
-reference_markers,
+marker_overlap_result = sc.tl.marker_gene_overlap(
-#if $method.key
+adata,
-key='$method.key',
+reference_markers,
-#end if
+#if str($method.key) != '':
-method='$method.overlap.method',
+key='$method.key',
-#if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None'
+#end if
-normalize='$method.overlap.normalize',
+method='$method.overlap.method',
-#end if
+#if str($method.overlap.method) == 'overlap_count' and str($method.overlap.normalize) != 'None':
-#if str($method.top_n_markers) != ''
+normalize='$method.overlap.normalize',
-top_n_markers=$method.top_n_markers,
+#end if
-#end if
+#if str($method.top_n_markers) != '':
-#if str($method.adj_pval_threshold) != ''
+top_n_markers=$method.top_n_markers,
-adj_pval_threshold=$method.adj_pval_threshold,
+#end if
-#end if
+#if str($method.adj_pval_threshold) != '':
-#if $method.key_added
+adj_pval_threshold=$method.adj_pval_threshold,
-key_added='$method.key_added',
+#end if
-#end if
+#if $method.key_added:
-inplace=True)
+key_added='$method.key_added',
+#end if
-#else if $method.method == "pp.log1p"
+inplace=False)
+adata.uns['marker_gene_overlap'] = marker_overlap_result
+#else if str($method.method) == "pp.log1p":
 sc.pp.log1p(
 adata,
+#if str($method.base) != '':
+base=$method.base,
+#end if
+#if str($method.layer) != '':
+layer='$method.layer',
+#end if
+#if str($method.obsm) != '':
+obsm='$method.obsm',
+#end if
 copy=False)
-#else if $method.method == "pp.scale"
+#else if str($method.method) == "pp.scale":
 sc.pp.scale(
 adata,
 zero_center=$method.zero_center,
-#if str($method.max_value) != ''
+#if str($method.max_value) != '':
 max_value=$method.max_value,
 #end if
+#if str($method.layer) != '':
+layer='$method.layer',
+#end if
+#if str($method.obsm) != '':
+obsm='$method.obsm',
+#end if
+#if str($method.mask_obs) != '':
+mask_obs='$method.mask_obs',
+#end if
 copy=False)
-#else if $method.method == "pp.sqrt"
+#else if str($method.method) == "pp.sqrt":
+print("stats before sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
 sc.pp.sqrt(
 adata,
 copy=False)
 #end if
-@CMD_anndata_write_outputs@
+print("stats after sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
-]]></configfile>
+@CMD_ANNDATA_WRITE_OUTPUTS@
+]]>
+</configfile>
 </configfiles>
 <inputs>
 <expand macro="inputs_anndata"/>
 <conditional name="method">
 <param argument="method" type="select" label="Method used for inspecting">
 <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using 'pp.calculate_qc_metrics'</option>
 <option value="pp.neighbors">Compute a neighborhood graph of observations, using 'pp.neighbors'</option>
 <option value="tl.score_genes">Score a set of genes, using 'tl.score_genes'</option>
 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using 'tl.score_genes_cell_cycle'</option>
 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using 'tl.rank_genes_groups'</option>
-<!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option>-->
+<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option>
 <option value="pp.log1p">Logarithmize the data matrix, using 'pp.log1p'</option>
 <option value="pp.scale">Scale data to unit variance and zero mean, using 'pp.scale'</option>
 <option value="pp.sqrt">Square root the data matrix, using 'pp.sqrt'</option>
 </param>
 <when value="pp.calculate_qc_metrics">
 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
-<param argument="qc_vars" type="text" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for" help="Keys separated by a comma">
+<param argument="qc_vars" type="text" optional="true" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for" help="Keys separated by a comma">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
-<param argument="percent_top" type="text" value="" label="Proportions of top genes to cover"
+<param argument="percent_top" type="text" value="" optional="true" label="Proportions of top genes to cover"
 help=" Values (integers) are considered 1-indexed, '50' finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. If empty don't calculate">
-<expand macro="sanitize_vectors" />
+<expand macro="sanitize_vectors"/>
 </param>
+<expand macro="param_layer"/>
+<expand macro="param_use_raw"/>
+<param argument="log1p" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Compute log1p transformed annotations"/>
 </when>
 <when value="pp.neighbors">
 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If 'knn' is 'True', number of nearest neighbors to be searched. If 'knn' is 'False', a Gaussian kernel width is set to the distance of the 'n_neighbors' neighbor."/>
-<param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
+<param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use"/>
-<param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter">
+<expand macro="param_use_rep"/>
-<expand macro="sanitize_query" />
-</param>
 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the 'n_neighbors' nearest neighbor."/>
-<param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/>
+<param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities">
-<param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help="">
+<option value="umap" selected="true">umap (McInnes et al, 2018)</option>
-<option value="umap">umap (McInnes et al, 2018)</option>
 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option>
 </param>
-<param argument="metric" type="select" label="Distance metric" help="">
+<param argument="metric" type="select" label="Distance metric">
 <expand macro="distance_metric_options"/>
 </param>
+<param argument="random_state" type="integer" value="0" label="Numpy random seed"/>
+<param argument="key_added" type="text" value="" optional="true" label="Key to store neighbors, distances and connectivities" help="If specified, the neighbors data is added to .uns[key_added], distances are stored in .obsp[key_added+'_distances'] and connectivities in .obsp[key_added+'_connectivities']"/>
 </when>
 <when value="tl.score_genes">
-<param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma">
+<param argument="gene_list" type="text" value="" optional="false" label="The list of gene names used for score calculation" help="Genes separated by a comma">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
-<param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
+<param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/>
-help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/>
+<param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" help="Default is all genes. Genes separated by a comma">
-<param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
+<expand macro="sanitize_query"/>
-help="Default is all genes. Genes separated by a comma">
+</param>
-<expand macro="sanitize_query" />
+<param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'">
-</param>
+<expand macro="sanitize_query"/>
-<expand macro="score_genes_params"/>
+</param>
-<param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'" help="">
+<expand macro="params_score_genes"/>
-<expand macro="sanitize_query" />
-</param>
 </when>
 <when value="tl.score_genes_cell_cycle">
 <conditional name='s_genes'>
 <param name="format" type="select" label="Format for the list of genes associated with S phase">
+<option value="text" selected="true">Text</option>
 <option value="file">File</option>
-<option value="text" selected="true">Text</option>
 </param>
 <when value="text">
 <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
 </when>
 <when value="file">
 <param name="file" type="data" format="txt" label="File with the list of genes associated with S phase" help="One gene per line"/>
 </when>
 </conditional>
 <conditional name='g2m_genes'>
 <param name="format" type="select" label="Format for the list of genes associated with G2M phase">
+<option value="text" selected="true">Text</option>
 <option value="file">File</option>
-<option value="text" selected="true">Text</option>
 </param>
 <when value="text">
 <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
 </when>
 <when value="file">
 <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/>
 </when>
 </conditional>
-<expand macro="score_genes_params"/>
+<expand macro="params_score_genes"/>
 </when>
 <when value="tl.rank_genes_groups">
-<param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help="">
+<param argument="groupby" type="text" value="" label="The key of the observations grouping to consider">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
 <expand macro="param_use_raw"/>
 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups.">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
+</param>
+<param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to perform tests on">
+<expand macro="sanitize_query"/>
 </param>
 <conditional name="ref">
 <param name="rest" type="select" label="Comparison">
-<option value="rest">Compare each group to the union of the rest of the group</option>
+<option value="rest" selected="true">Compare each group to the union of the rest of the group</option>
 <option value="group_id">Compare with respect to a specific group</option>
 </param>
 <when value="rest"/>
 <when value="group_id">
 <param argument="reference" type="text" value="" label="Group identifier with respect to which compare">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
 </when>
 </conditional>
-<param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/>
+<param argument="n_genes" type="integer" min="0" value="" optional="true" label="The number of genes that appear in the returned tables" help="Defaults to all genes"/>
 <conditional name="tl_rank_genes_groups_method">
 <param argument="method" type="select" label="Method">
 <option value="t-test" selected="true">t-test</option>
 <option value="wilcoxon">Wilcoxon-Rank-Sum</option>
 <option value="t-test_overestim_var">t-test with overestimate of variance of each group</option>
 <when value="t-test">
 <expand macro="corr_method"/>
 </when>
 <when value="wilcoxon">
 <expand macro="corr_method"/>
+<param argument="tie_correct" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use tie correction for 'wilcoxon' scores"/>
 </when>
 <when value="t-test_overestim_var">
 <expand macro="corr_method"/>
 </when>
 <when value="logreg">
 <conditional name="solver">
 <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty.">
+<option value="lbfgs" selected="true">lbfgs</option>
 <option value="newton-cg">newton-cg</option>
-<option value="lbfgs">lbfgs</option>
 <option value="liblinear">liblinear</option>
 <option value="sag">sag</option>
 <option value="saga">saga</option>
 </param>
-<when value="newton-cg">
+<when value="lbfgs">
+<param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/>
 <expand macro="fit_intercept"/>
 <expand macro="max_iter"/>
 <expand macro="multi_class"/>
 </when>
-<when value="lbfgs">
+<when value="newton-cg">
+<param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/>
 <expand macro="fit_intercept"/>
 <expand macro="max_iter"/>
 <expand macro="multi_class"/>
 </when>
 <when value="liblinear">
 <when value="l1"/>
 <when value="l2">
 <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false"
 label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/>
 </when>
-<when value="customized">
-<expand macro="custom_penalty"/>
-</when>
 </conditional>
 <conditional name="intercept_scaling">
-<param argument="fit_intercept" type="select"
+<param argument="fit_intercept" type="select" label="Should a constant (a.k.a. bias or intercept) be added to the decision function?">
-label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help="">
+<option value="True" selected="true">Yes</option>
-<option value="True">Yes</option>
 <option value="False">No</option>
 </param>
 <when value="True">
-<param argument="intercept_scaling" type="float" value="1.0"
+<param argument="intercept_scaling" type="float" value="1.0" label="Intercept scaling" help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
-label="Intercept scaling"
-help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
 </when>
 <when value="False"/>
 </conditional>
 <expand macro="random_state"/>
 </when>
 <when value="sag">
+<param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/>
 <expand macro="fit_intercept"/>
 <expand macro="random_state"/>
 <expand macro="max_iter"/>
 <expand macro="multi_class"/>
 </when>
 <when value="saga">
 <conditional name="penalty">
-<expand macro="penalty"/>
+<expand macro="penalty">
+<option value="elasticnet">elasticnet</option>
+<option value="None">None</option>
+</expand>
 <when value="l1"/>
 <when value="l2"/>
-<when value="customized">
+<when value="elasticnet"/>
-<expand macro="custom_penalty"/>
+<when value="None"/>
-</when>
 </conditional>
 <expand macro="fit_intercept"/>
 <expand macro="multi_class"/>
 </when>
 </conditional>
-<param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
+<param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria"/>
-<param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
+<param argument="c" type="float" value="1.0" label="Inverse of regularization strength" help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
-help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
 </when>
 </conditional>
+<param argument="key_added" type="text" value="" optional="true" label="The key in adata.uns information is saved to"/>
 </when>
 <!-- With inplace=True, NotImplementedError: Writing Pandas dataframes to h5ad is currently under development. Please use `inplace=False`. -->
-<!-- <when value="tl.marker_gene_overlap">
+<!-- Issue is fixed in the script -->
+<when value="tl.marker_gene_overlap">
 <repeat name="reference_markers" title="Marker genes">
-<param name="key" type="text" value="" label="Cell identity name" help=""/>
+<param name="key" type="text" value="" label="Cell identity name"/>
 <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from 'var'"/>
 </repeat>
 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/>
 <conditional name="overlap">
 <param argument="method" type="select" label="Method to calculate marker gene overlap">
-<option value="overlap_count">overlap_count: Intersection of the gene set</option>
+<option value="overlap_count" selected="true">overlap_count: Intersection of the gene set</option>
 <option value="overlap_coef">overlap_coef: Overlap coefficient</option>
 <option value="jaccard">jaccard: Jaccard index</option>
 </param>
 <when value="overlap_count">
 <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output">
-<option value="None">None</option>
+<option value="None" selected="true">None</option>
 <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option>
 <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option>
 </param>
 </when>
 <when value="overlap_coef"/>
 <when value="jaccard"/>
 </conditional>
-<param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
+<param argument="top_n_markers" type="integer" optional="true" value="" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
-<param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
+<param argument="adj_pval_threshold" type="float" optional="true" value="" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
-<param argument="key_added" type="text" value="marker_gene_overlap" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/>
+<param argument="key_added" type="text" optional="true" value="" label="Key that will contain the marker overlap scores in 'uns'"/>
-</when>-->
+</when>
-<when value="pp.log1p"/>
+<when value="pp.log1p">
+<param argument="base" type="integer" value="" optional="true" label="Base of the logarithm." help="Natural logarithm is used by default."/>
+<param argument="layer" type="text" value="" optional="true" label="Entry of layers to transform">
+<expand macro="sanitize_query"/>
+</param>
+<param argument="obsm" type="text" value="" optional="true" label="Entry of obsm to transform">
+<expand macro="sanitize_query"/>
+</param>
+</when>
 <when value="pp.scale">
-<param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
+<param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
-label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
+<param argument="max_value" type="float" value="" optional="true" label="Maximum value" help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
-<param argument="max_value" type="float" value="" optional="true" label="Maximum value"
+<param argument="layer" type="text" value="" label="Which element of layers to scale">
-help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
+<expand macro="sanitize_query"/>
+</param>
+<param argument="obsm" type="text" value="" label="Which element of obsm to scale">
+<expand macro="sanitize_query"/>
+</param>
+<param argument="mask_obs" type="text" value="" label="Restrict both the derivation of scaling parameters and the scaling itself to a certain set of observations.">
+<expand macro="sanitize_query"/>
+</param>
 </when>
 <when value="pp.sqrt"/>
 </conditional>
 <expand macro="inputs_common_advanced"/>
 </inputs>
 <outputs>
 <expand macro="anndata_outputs"/>
 </outputs>
 <tests>
-<test expect_num_outputs="2">
 <!-- test 1 -->
-<param name="adata" value="sparce_csr_matrix.h5ad" />
+<test expect_num_outputs="2">
+<param name="adata" value="sparce_csr_matrix.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.calculate_qc_metrics"/>
-<param name="expr_type" value="counts"/>
-<param name="var_type" value="genes"/>
 <param name="qc_vars" value="mito,negative"/>
-<param name="percent_top" value=""/>
+</conditional>
+<section name="advanced_common">
+<param name="show_log" value="true"/>
+</section>
+<output name="hidden_output">
+<assert_contents>
+<has_text_matching expression="sc.pp.calculate_qc_metrics"/>
+<has_text_matching expression="expr_type='counts'"/>
+<has_text_matching expression="var_type='genes'"/>
+<has_text_matching expression="qc_vars=\['mito', 'negative'\]"/>
+</assert_contents>
+</output>
+<output name="anndata_out" ftype="h5ad">
+<assert_contents>
+<has_h5_keys keys="obs/n_genes_by_counts,obs/log1p_n_genes_by_counts,obs/total_counts,obs/log1p_total_counts,obs/pct_counts_in_top_50_genes,obs/pct_counts_in_top_100_genes,obs/pct_counts_in_top_200_genes,obs/pct_counts_in_top_500_genes,obs/total_counts_mito,obs/log1p_total_counts_mito,obs/pct_counts_mito,obs/total_counts_negative,obs/log1p_total_counts_negative,obs/pct_counts_negative"/>
+<has_h5_keys keys="var/n_cells_by_counts,var/mean_counts,var/log1p_mean_counts,var/pct_dropout_by_counts,var/total_counts,var/log1p_total_counts"/>
+</assert_contents>
+</output>
+</test>
+<!-- test 2 -->
+<test expect_num_outputs="2">
+<param name="adata" value="sparce_csr_matrix.h5ad"/>
+<conditional name="method">
+<param name="method" value="pp.calculate_qc_metrics"/>
+<param name="qc_vars" value="mito,negative"/>
+<param name="percent_top" value="50,100,200,300"/>
 </conditional>
 <section name="advanced_common">
 <param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.calculate_qc_metrics" />
 <has_text_matching expression="expr_type='counts'" />
 <has_text_matching expression="var_type='genes'" />
 <has_text_matching expression="qc_vars=\['mito', 'negative'\]" />
-</assert_contents>
+<has_text_matching expression="percent_top=\[50, 100, 200, 300\]" />
-</output>
+</assert_contents>
-<output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/>
+</output>
-</test>
+<output name="anndata_out" ftype="h5ad">
-<test expect_num_outputs="2">
+<assert_contents>
-<!-- test 2 -->
+<has_h5_keys keys="obs/n_genes_by_counts,obs/log1p_n_genes_by_counts,obs/total_counts,obs/log1p_total_counts,obs/pct_counts_in_top_50_genes,obs/pct_counts_in_top_100_genes,obs/pct_counts_in_top_200_genes,obs/pct_counts_in_top_300_genes,obs/total_counts_mito,obs/log1p_total_counts_mito,obs/pct_counts_mito,obs/total_counts_negative,obs/log1p_total_counts_negative,obs/pct_counts_negative"/>
-<param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
+<has_h5_keys keys="var/mito,var/negative,var/n_cells_by_counts,var/mean_counts,var/log1p_mean_counts,var/pct_dropout_by_counts,var/total_counts,var/log1p_total_counts"/>
+</assert_contents>
+</output>
+</test>
+<!-- test 3 -->
+<test expect_num_outputs="2">
+<param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.neighbors"/>
-<param name="n_neighbors" value="15"/>
+</conditional>
-<param name="knn" value="True"/>
+<section name="advanced_common">
-<param name="random_state" value="0"/>
+<param name="show_log" value="true"/>
-<param name="pp_neighbors_method" value="umap"/>
-<param name="metric" value="euclidean"/>
-</conditional>
-<section name="advanced_common">
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.neighbors"/>
 <has_text_matching expression="n_neighbors=15"/>
 <has_text_matching expression="random_state=0"/>
 <has_text_matching expression="method='umap'"/>
 <has_text_matching expression="metric='euclidean'"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size">
+<output name="anndata_out" ftype="h5ad">
 <assert_contents>
-<has_h5_keys keys="X, obs, obsm, uns, var" />
+<has_h5_keys keys="uns/neighbors"/>
-</assert_contents>
+<has_h5_keys keys="obsp/connectivities,obsp/distances"/>
-</output>
+</assert_contents>
-</test>
+</output>
-<test expect_num_outputs="2">
+</test>
-<!-- test 3 -->
-<param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
+<!-- test 4 -->
+<test expect_num_outputs="2">
+<param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.neighbors"/>
-<param name="n_neighbors" value="15"/>
-<param name="knn" value="True"/>
 <param name="pp_neighbors_method" value="gauss"/>
 <param name="metric" value="braycurtis"/>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.neighbors"/>
 <has_text_matching expression="n_neighbors=15"/>
 <has_text_matching expression="random_state=0"/>
 <has_text_matching expression="method='gauss'"/>
 <has_text_matching expression="metric='braycurtis'"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="obsp/connectivities,obsp/distances"/>
-<!-- test 4 -->
+</assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+</output>
+</test>
+<!-- test 5 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="tl.score_genes"/>
 <param name="gene_list" value="Gata2, Fog1"/>
 <param name="ctrl_size" value="2"/>
 <param name="n_bins" value="2"/>
 <param name="random_state" value="2"/>
-<param name="use_raw" value="False"/>
+</conditional>
-<param name="score_name" value="score"/>
+<section name="advanced_common">
-</conditional>
+<param name="show_log" value="true"/>
-<section name="advanced_common">
+</section>
-<param name="show_log" value="true" />
+<output name="hidden_output">
-</section>
+<assert_contents>
-<output name="hidden_output">
+<has_text_matching expression="sc.tl.score_genes"/>
-<assert_contents>
+<has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]"/>
-<has_text_matching expression="sc.tl.score_genes" />
+<has_text_matching expression="ctrl_size=2"/>
-<has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" />
+<has_text_matching expression="score_name='score'"/>
-<has_text_matching expression="ctrl_size=2" />
+<has_text_matching expression="n_bins=2"/>
-<has_text_matching expression="score_name='score'" />
+<has_text_matching expression="random_state=2"/>
-<has_text_matching expression="n_bins=2" />
+<has_text_matching expression="use_raw=False"/>
-<has_text_matching expression="random_state=2" />
+<has_text_matching expression="copy=False"/>
-<has_text_matching expression="use_raw=False" />
+</assert_contents>
-<has_text_matching expression="copy=False" />
+</output>
-</assert_contents>
+<output name="anndata_out" ftype="h5ad">
-</output>
+<assert_contents>
-<output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<has_h5_keys keys="obs/score"/>
-</test>
+</assert_contents>
-<test expect_num_outputs="2">
+</output>
-<!-- test 5 -->
+</test>
-<param name="adata" value="krumsiek11.h5ad" />
+<!-- test 6 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="tl.score_genes_cell_cycle"/>
 <conditional name='s_genes'>
 <param name="format" value="text"/>
 <param name="text" value="Gata2, Fog1, EgrNab"/>
 <param name="format" value="text"/>
 <param name="text" value="Gata2, Fog1, EgrNab"/>
 </conditional>
 <param name="n_bins" value="2"/>
 <param name="random_state" value="1"/>
-<param name="use_raw" value="False"/>
+</conditional>
-</conditional>
+<section name="advanced_common">
-<section name="advanced_common">
+<param name="show_log" value="true"/>
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
 <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
 <has_text_matching expression="n_bins=2"/>
 <has_text_matching expression="random_state=1"/>
 <has_text_matching expression="use_raw=False"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="obs/S_score,obs/G2M_score,obs/phase"/>
-<!-- test 6 -->
+</assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+</output>
+</test>
+<!-- test 7 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="tl.rank_genes_groups"/>
 <param name="groupby" value="cell_type"/>
-<param name="use_raw" value="False"/>
-<conditional name="ref">
-<param name="rest" value="rest"/>
-</conditional>
 <param name="n_genes" value="100"/>
 <conditional name="tl_rank_genes_groups_method">
 <param name="method" value="t-test_overestim_var"/>
-<param name="corr_method" value="benjamini-hochberg"/>
 </conditional>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.tl.rank_genes_groups"/>
 <has_text_matching expression="groupby='cell_type'"/>
 <has_text_matching expression="n_genes=100"/>
 <has_text_matching expression="method='t-test_overestim_var'"/>
 <has_text_matching expression="corr_method='benjamini-hochberg'"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="uns/rank_genes_groups"/>
-<!-- test 7 -->
+</assert_contents>
-<param name="adata" value="pbmc68k_reduced.h5ad" />
+</output>
+</test>
+<!-- test 8 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="tl.rank_genes_groups"/>
-<param name="groupby" value="louvain"/>
+<param name="groupby" value="cell_type"/>
-<param name="use_raw" value="True"/>
-<conditional name="ref">
-<param name="rest" value="rest"/>
-</conditional>
 <param name="n_genes" value="100"/>
 <conditional name="tl_rank_genes_groups_method">
 <param name="method" value="logreg"/>
-<conditional name="solver">
-<param name="solver" value="newton-cg"/>
-<param name="fit_intercept" value="True"/>
-<param name="max_iter" value="100"/>
-<param name="multi_class" value="auto"/>
-</conditional>
-<param name="tol" value="1e-4"/>
-<param name="c" value="1.0"/>
 </conditional>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.tl.rank_genes_groups"/>
-<has_text_matching expression="groupby='louvain'"/>
+<has_text_matching expression="groupby='cell_type'"/>
-<has_text_matching expression="use_raw=True"/>
+<has_text_matching expression="use_raw=False"/>
 <has_text_matching expression="reference='rest'"/>
 <has_text_matching expression="n_genes=100"/>
 <has_text_matching expression="method='logreg'"/>
-<has_text_matching expression="solver='newton-cg'"/>
+<has_text_matching expression="solver='lbfgs'"/>
 <has_text_matching expression="penalty='l2'"/>
 <has_text_matching expression="fit_intercept=True"/>
 <has_text_matching expression="max_iter=100"/>
 <has_text_matching expression="multi_class='auto'"/>
 <has_text_matching expression="tol=0.0001"/>
 <has_text_matching expression="C=1.0"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15">
+<output name="anndata_out" ftype="h5ad">
 <assert_contents>
-<has_h5_keys keys="X, obs, obsm, raw/X, raw/var, uns, var" />
+<has_h5_keys keys="uns/rank_genes_groups"/>
 </assert_contents>
 </output>
 </test>
-<test expect_num_outputs="2">
-<!-- test 8 -->
+<!-- test 9 -->
-<param name="adata" value="pbmc68k_reduced.h5ad" />
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="tl.rank_genes_groups"/>
-<param name="groupby" value="louvain"/>
+<param name="groupby" value="cell_type"/>
-<param name="use_raw" value="True"/>
-<conditional name="ref">
-<param name="rest" value="rest"/>
-</conditional>
 <param name="n_genes" value="100"/>
 <conditional name="tl_rank_genes_groups_method">
 <param name="method" value="logreg"/>
 <conditional name="solver">
 <param name="solver" value="liblinear"/>
 <conditional name="penalty">
 <param name="penalty" value="l2"/>
-<param name="dual" value="False"/>
-<conditional name="intercept_scaling">
-<param name="fit_intercept" value="True"/>
-<param name="intercept_scaling" value="1.0" />
-</conditional>
 <param name="random_state" value="1"/>
 </conditional>
 </conditional>
-<param name="tol" value="1e-4"/>
-<param name="c" value="1.0"/>
 </conditional>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.tl.rank_genes_groups"/>
-<has_text_matching expression="groupby='louvain'"/>
+<has_text_matching expression="groupby='cell_type'"/>
-<has_text_matching expression="use_raw=True"/>
+<has_text_matching expression="use_raw=False"/>
 <has_text_matching expression="reference='rest'"/>
 <has_text_matching expression="n_genes=100"/>
 <has_text_matching expression="method='logreg'"/>
 <has_text_matching expression="solver='liblinear'"/>
 <has_text_matching expression="penalty='l2'"/>
 <has_text_matching expression="intercept_scaling=1.0"/>
 <has_text_matching expression="tol=0.0001"/>
 <has_text_matching expression="C=1.0"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15">
+<output name="anndata_out" ftype="h5ad">
 <assert_contents>
-<has_h5_keys keys="X, obs, obsm, raw/X, raw/var, uns, var" />
+<has_h5_keys keys="uns/rank_genes_groups"/>
 </assert_contents>
 </output>
 </test>
-<!-- test expect_num_outputs="2">
-< test 9  tl.marker_gene_overlap function was commented because inpace=True does not work>
+<!-- test 10 -->
-<param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" />
+<test expect_num_outputs="2">
+<param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad"/>
 <conditional name="method">
 <param name="method" value="tl.marker_gene_overlap"/>
 <repeat name="reference_markers">
 <param name="key" value="CD4 T cells"/>
-<param name="value" value="IL7R"/>
+<param name="values" value="IL7R"/>
 </repeat>
 <repeat name="reference_markers">
 <param name="key" value="CD14+ Monocytes"/>
-<param name="value" value="CD14,LYZ"/>
+<param name="values" value="CD14,LYZ"/>
 </repeat>
 <repeat name="reference_markers">
 <param name="key" value="B cells"/>
-<param name="value" value="MS4A1"/>
+<param name="values" value="MS4A1"/>
 </repeat>
-<conditional name="overlap">
+</conditional>
-<param name="method" value="overlap_count"/>
+<section name="advanced_common">
-<param name="normalize" value="None"/>
+<param name="show_log" value="true"/>
-</conditional>
+</section>
-</conditional>
 <assert_stdout>
-<has_text_matching expression="tl.marker_gene_overlap"/>
+<has_text_matching expression="marker_gene_overlap"/>
-<has_text_matching expression="key='rank_genes_groups'"/>
-<has_text_matching expression="method='overlap_count'"/>
 </assert_stdout>
-<output name="anndata_out" file="tl.marker_gene_overlap.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="hidden_output">
-</test> -->
+<assert_contents>
-<test expect_num_outputs="2">
+<has_text_matching expression="sc.tl.marker_gene_overlap"/>
-<!-- test 10 -->
+</assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+</output>
+<output name="anndata_out" ftype="h5ad">
+<assert_contents>
+<has_h5_keys keys="uns/rank_genes_groups"/>
+</assert_contents>
+</output>
+</test>
+<!-- test 11 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.log1p"/>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.log1p"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="uns/log1p"/>
-<!-- test 11 -->
+</assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+</output>
+</test>
+<!-- test 12 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.scale"/>
-<param name="zero_center" value="true"/>
+</conditional>
-</conditional>
+<section name="advanced_common">
-<section name="advanced_common">
+<param name="show_log" value="true"/>
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.scale"/>
 <has_text_matching expression="zero_center=True"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="var/mean,var/std"/>
-<!-- test 12 -->
+</assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+</output>        </test>
+<!-- test 13 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.scale"/>
-<param name="zero_center" value="true"/>
 <param name="max_value" value="10"/>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.scale"/>
 <has_text_matching expression="zero_center=True"/>
 <has_text_matching expression="max_value=10.0"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="var/mean,var/std"/>
-<!-- test 13 -->
+</assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+</output>
+</test>
+<!-- test 14 -->
+<test expect_num_outputs="2">
+<param name="adata" value="random-randint.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.sqrt"/>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.sqrt"/>
-</assert_contents>
+<has_text_matching expression="stats before sqrt: min= 0.0 max= 999.0 mean= 499.83777"/>
-</output>
+<has_text_matching expression="stats after sqrt: min= 0.0 max= 31.606962 mean= 21.079018"/>
-<output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+</assert_contents>
-</test>
+</output>
-<test expect_num_outputs="2">
+<output name="anndata_out" ftype="h5ad">
-<!-- test 13 -->
+<assert_contents>
-<param name="adata" value="sparce_csr_matrix.h5ad" />
+<has_h5_keys keys="obs/index"/>
-<conditional name="method">
+</assert_contents>
-<param name="method" value="pp.calculate_qc_metrics"/>
+</output>
-<param name="expr_type" value="counts"/>
-<param name="var_type" value="genes"/>
-<param name="qc_vars" value="mito,negative"/>
-<param name="percent_top" value="50,100,200,300"/>
-</conditional>
-<section name="advanced_common">
-<param name="show_log" value="true" />
-</section>
-<output name="hidden_output">
-<assert_contents>
-<has_text_matching expression="sc.pp.calculate_qc_metrics" />
-<has_text_matching expression="expr_type='counts'" />
-<has_text_matching expression="var_type='genes'" />
-<has_text_matching expression="qc_vars=\['mito', 'negative'\]" />
-<has_text_matching expression="percent_top=\[50, 100, 200, 300\]" />
-</assert_contents>
-</output>
-<output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/>
 </test>
 </tests>
 <help><![CDATA[
 Calculate quality control metrics., using `pp.calculate_qc_metrics`
 ===================================================================
 Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater.
 Currently is most efficient on a sparse CSR or dense matrix.
 It updates the observation level metrics with
 - total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell)
 Calculate an overlap score between data-deriven marker genes and provided markers (`tl.marker_gene_overlap`)
 ============================================================================================================
 Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps.
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.marker_gene_overlap.html>`__
 Logarithmize the data matrix (`pp.log1p`)
 =========================================
 More details on the `scanpy documentation

Mercurial > repos > iuc > scanpy_inspect

comparison inspect.xml @ 14:1c36180febfb draft