Mercurial > repos > iuc > scanpy_inspect
comparison inspect.xml @ 1:dd565b6027ab draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
| author | iuc |
|---|---|
| date | Wed, 16 Oct 2019 06:25:03 -0400 |
| parents | 6eaa7094afd5 |
| children | 5713d7fe1304 |
comparison
equal
deleted
inserted
replaced
| 0:6eaa7094afd5 | 1:dd565b6027ab |
|---|---|
| 1 <tool id="scanpy_inspect" name="Inspect with scanpy" version="@galaxy_version@"> | 1 <tool id="scanpy_inspect" name="Inspect and manipulate" version="@galaxy_version@"> |
| 2 <description></description> | 2 <description> with scanpy</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 5 <xml name="score_genes_params"> | |
| 6 <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/> | |
| 7 <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/> | |
| 8 <expand macro="param_use_raw"/> | |
| 9 </xml> | |
| 10 <token name="@CMD_score_genes_inputs@"><![CDATA[ | |
| 11 n_bins=$method.n_bins, | |
| 12 random_state=$method.random_state, | |
| 13 use_raw=$method.use_raw, | |
| 14 copy=False | |
| 15 ]]></token> | |
| 16 <xml name="corr_method"> | |
| 17 <param argument="corr_method" type="select" label="P-value correction method"> | |
| 18 <option value="benjamini-hochberg">Benjamini-Hochberg</option> | |
| 19 <option value="bonferroni">Bonferroni</option> | |
| 20 </param> | |
| 21 </xml> | |
| 22 <xml name="fit_intercept"> | |
| 23 <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
| 24 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/> | |
| 25 </xml> | |
| 26 <xml name="max_iter"> | |
| 27 <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/> | |
| 28 </xml> | |
| 29 <xml name="multi_class"> | |
| 30 <param argument="multi_class" type="select" label="Multi class" help=""> | |
| 31 <option value="ovr">ovr: a binary problem is fit for each label</option> | |
| 32 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option> | |
| 33 <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option> | |
| 34 </param> | |
| 35 </xml> | |
| 36 <xml name="penalty"> | |
| 37 <param argument="penalty" type="select" label="Norm used in the penalization" help=""> | |
| 38 <option value="l1">l1</option> | |
| 39 <option value="l2">l2</option> | |
| 40 <option value="customized">customized</option> | |
| 41 </param> | |
| 42 </xml> | |
| 43 <xml name="custom_penalty"> | |
| 44 <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""/> | |
| 45 </xml> | |
| 46 <xml name="random_state"> | |
| 47 <param argument="random_state" type="integer" value="" optional="true" | |
| 48 label="The seed of the pseudo random number generator to use when shuffling the data" help=""/> | |
| 49 </xml> | |
| 5 </macros> | 50 </macros> |
| 6 <expand macro="requirements"/> | 51 <expand macro="requirements"/> |
| 7 <expand macro="version_command"/> | 52 <expand macro="version_command"/> |
| 8 <command detect_errors="exit_code"><![CDATA[ | 53 <command detect_errors="exit_code"><![CDATA[ |
| 9 @CMD@ | 54 @CMD@ |
| 11 <configfiles> | 56 <configfiles> |
| 12 <configfile name="script_file"><![CDATA[ | 57 <configfile name="script_file"><![CDATA[ |
| 13 @CMD_imports@ | 58 @CMD_imports@ |
| 14 @CMD_read_inputs@ | 59 @CMD_read_inputs@ |
| 15 | 60 |
| 16 #if $method.method == "tl.paga" | 61 #if $method.method == "pp.calculate_qc_metrics" |
| 17 sc.tl.paga( | 62 sc.pp.calculate_qc_metrics( |
| 18 adata=adata, | 63 adata=adata, |
| 19 groups='$method.groups', | 64 expr_type='$method.expr_type', |
| 20 use_rna_velocity =$method.use_rna_velocity, | 65 var_type='$method.var_type', |
| 21 model='$method.model', | 66 #if str($method.qc_vars) != '' |
| 67 #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')] | |
| 68 qc_vars=$qc_vars, | |
| 69 #end if | |
| 70 #if str($method.percent_top) != '' | |
| 71 #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')] | |
| 72 percent_top=$method.percent_top, | |
| 73 #end if | |
| 74 inplace=True) | |
| 75 | |
| 76 #else if $method.method == "tl.score_genes" | |
| 77 sc.tl.score_genes( | |
| 78 adata=adata, | |
| 79 #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')] | |
| 80 gene_list=$gene_list, | |
| 81 ctrl_size=$method.ctrl_size, | |
| 82 score_name='$method.score_name', | |
| 83 #if $method.gene_pool | |
| 84 #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')] | |
| 85 gene_pool=$gene_pool, | |
| 86 #end if | |
| 87 @CMD_score_genes_inputs@) | |
| 88 | |
| 89 #else if $method.method == "tl.score_genes_cell_cycle" | |
| 90 #if str($method.s_genes.format) == 'file' | |
| 91 with open('$method.s_genes.file', 'r') as s_genes_f: | |
| 92 s_genes = [str(x.strip()) for x in s_genes_f.readlines()] | |
| 93 print(s_genes) | |
| 94 #end if | |
| 95 | |
| 96 #if str($method.g2m_genes.format) == 'file' | |
| 97 with open('$method.g2m_genes.file', 'r') as g2m_genes_f: | |
| 98 g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()] | |
| 99 print(g2m_genes) | |
| 100 #end if | |
| 101 | |
| 102 sc.tl.score_genes_cell_cycle( | |
| 103 adata=adata, | |
| 104 #if str($method.s_genes.format) == 'text' | |
| 105 #set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')] | |
| 106 s_genes=$s_genes, | |
| 107 #else if str($method.s_genes.format) == 'file' | |
| 108 s_genes=s_genes, | |
| 109 #end if | |
| 110 #if str($method.g2m_genes.format) == 'text' | |
| 111 #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')] | |
| 112 g2m_genes=$g2m_genes, | |
| 113 #else if str($method.g2m_genes.format) == 'file' | |
| 114 g2m_genes=g2m_genes, | |
| 115 #end if | |
| 116 @CMD_score_genes_inputs@) | |
| 117 | |
| 118 #else if $method.method == 'pp.neighbors' | |
| 119 sc.pp.neighbors( | |
| 120 adata=adata, | |
| 121 n_neighbors=$method.n_neighbors, | |
| 122 #if str($method.n_pcs) != '' | |
| 123 n_pcs=$method.n_pcs, | |
| 124 #end if | |
| 125 #if str($method.use_rep) != '' | |
| 126 use_rep='$method.use_rep', | |
| 127 #end if | |
| 128 knn=$method.knn, | |
| 129 random_state=$method.random_state, | |
| 130 method='$method.pp_neighbors_method', | |
| 131 metric='$method.metric', | |
| 22 copy=False) | 132 copy=False) |
| 23 #elif $method.method == "tl.dpt" | 133 |
| 24 sc.tl.dpt( | 134 #else if $method.method == 'tl.rank_genes_groups' |
| 135 sc.tl.rank_genes_groups( | |
| 25 adata=adata, | 136 adata=adata, |
| 26 n_dcs=$method.n_dcs, | 137 groupby='$method.groupby', |
| 27 n_branchings=$method.n_branchings, | 138 use_raw=$method.use_raw, |
| 28 min_group_size=$method.min_group_size, | 139 #if str($method.groups) != '' |
| 29 allow_kendall_tau_shift=$method.allow_kendall_tau_shift, | 140 #set $group=[x.strip() for x in str($method.groups).split(',')] |
| 141 groups=$group, | |
| 142 #end if | |
| 143 #if $method.ref.rest == 'rest' | |
| 144 reference='$method.ref.rest', | |
| 145 #else | |
| 146 reference='$method.ref.reference', | |
| 147 #end if | |
| 148 n_genes=$method.n_genes, | |
| 149 method='$method.tl_rank_genes_groups_method.method', | |
| 150 #if $method.tl_rank_genes_groups_method.method != 'logreg' | |
| 151 corr_method='$method.tl_rank_genes_groups_method.corr_method', | |
| 152 #else | |
| 153 solver='$method.tl_rank_genes_groups_method.solver.solver', | |
| 154 #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg' | |
| 155 penalty='l2', | |
| 156 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
| 157 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, | |
| 158 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
| 159 #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs' | |
| 160 penalty='l2', | |
| 161 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
| 162 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, | |
| 163 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
| 164 #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear' | |
| 165 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1' | |
| 166 penalty='l1', | |
| 167 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2' | |
| 168 penalty='l2', | |
| 169 dual=$method.tl_rank_genes_groups_method.solver.penalty.dual, | |
| 170 #else | |
| 171 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen', | |
| 172 #end if | |
| 173 fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept, | |
| 174 #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True' | |
| 175 intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling, | |
| 176 #end if | |
| 177 #if $method.tl_rank_genes_groups_method.solver.random_state | |
| 178 random_state=$method.tl_rank_genes_groups_method.solver.random_state, | |
| 179 #end if | |
| 180 #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag' | |
| 181 penalty='l2', | |
| 182 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
| 183 #if $method.tl_rank_genes_groups_method.solver.random_state | |
| 184 random_state=$method.tl_rank_genes_groups_method.solver.random_state, | |
| 185 #end if | |
| 186 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, | |
| 187 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
| 188 #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga' | |
| 189 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1' | |
| 190 penalty='l1', | |
| 191 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2' | |
| 192 penalty='l2', | |
| 193 #else | |
| 194 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen', | |
| 195 #end if | |
| 196 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
| 197 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
| 198 #end if | |
| 199 tol=$method.tl_rank_genes_groups_method.tol, | |
| 200 C=$method.tl_rank_genes_groups_method.c, | |
| 201 #end if | |
| 202 only_positive=$method.only_positive) | |
| 203 | |
| 204 #else if $method.method == "tl.marker_gene_overlap" | |
| 205 reference_markers = {} | |
| 206 #for $i, $s in enumerate($method.reference_markers) | |
| 207 #set $list=[x.strip() for x in str($s.values).split(',')] | |
| 208 reference_markers['$s.key'] = $list | |
| 209 #end for | |
| 210 | |
| 211 sc.tl.marker_gene_overlap( | |
| 212 adata, | |
| 213 reference_markers, | |
| 214 #if str($method.key) != '' | |
| 215 key='$method.key', | |
| 216 #end if | |
| 217 method='$method.overlap.method', | |
| 218 #if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None' | |
| 219 normalize='$method.overlap.normalize', | |
| 220 #end if | |
| 221 #if str($method.top_n_markers) != '' | |
| 222 top_n_markers=$method.top_n_markers, | |
| 223 #end if | |
| 224 #if str($method.adj_pval_threshold) != '' | |
| 225 adj_pval_threshold=$method.adj_pval_threshold, | |
| 226 #end if | |
| 227 #if str($method.key_added) != '' | |
| 228 key_added='$method.key_added', | |
| 229 #end if | |
| 230 inplace=True) | |
| 231 | |
| 232 #else if $method.method == "pp.log1p" | |
| 233 sc.pp.log1p( | |
| 234 data=adata, | |
| 30 copy=False) | 235 copy=False) |
| 31 adata.obs.to_csv('$obs', sep='\t') | 236 |
| 237 #else if $method.method == "pp.scale" | |
| 238 sc.pp.scale( | |
| 239 data=adata, | |
| 240 zero_center=$method.zero_center, | |
| 241 #if $method.max_value | |
| 242 max_value=$method.max_value, | |
| 243 #end if | |
| 244 copy=False) | |
| 245 | |
| 246 #else if $method.method == "pp.sqrt" | |
| 247 sc.pp.sqrt( | |
| 248 data=adata, | |
| 249 copy=False) | |
| 32 #end if | 250 #end if |
| 33 | 251 |
| 34 @CMD_anndata_write_outputs@ | 252 @CMD_anndata_write_outputs@ |
| 35 ]]></configfile> | 253 ]]></configfile> |
| 36 </configfiles> | 254 </configfiles> |
| 37 <inputs> | 255 <inputs> |
| 38 <expand macro="inputs_anndata"/> | 256 <expand macro="inputs_anndata"/> |
| 39 <conditional name="method"> | 257 <conditional name="method"> |
| 40 <param argument="method" type="select" label="Method used for plotting"> | 258 <param argument="method" type="select" label="Method used for inspecting"> |
| 41 <!--<option value="tl.paga_compare_paths">, using `tl.paga_compare_paths`</option>!--> | 259 <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using `pp.calculate_qc_metrics`</option> |
| 42 <!--<option value="tl.paga_degrees">, using `tl.paga_degrees`</option>!--> | 260 <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option> |
| 43 <!--<option value="tl.paga_expression_entropies">, using `tl.paga_expression_entropies`</option>!--> | 261 <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option> |
| 44 <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using `tl.paga`</option> | 262 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option> |
| 45 <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using `tl.dpt`</option> | 263 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option> |
| 264 <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using `tl.marker_gene_overlap`</option>--> | |
| 265 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> | |
| 266 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> | |
| 267 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> | |
| 46 </param> | 268 </param> |
| 47 <when value="tl.paga"> | 269 <when value="pp.calculate_qc_metrics"> |
| 48 <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations (`adata.obs`)."/> | 270 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"/> |
| 49 <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that `adata.uns` contains a directed single-cell graph with key `['velocyto_transitions']`. This feature might be subject to change in the future."/> | 271 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"/> |
| 50 <param argument="model" type="select" label="PAGA connectivity model" help=""> | 272 <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of `.var` which identify variables you could want to control for" |
| 51 <option value="v1.2">v1.2</option> | 273 help="Keys separated by a comma"/> |
| 52 <option value="v1.0">v1.0</option> | 274 <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover" |
| 275 help=" Values (integers) are considered 1-indexed, `50` finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. | |
| 276 If empty don't calculate"/> | |
| 277 </when> | |
| 278 <when value="pp.neighbors"> | |
| 279 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/> | |
| 280 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> | |
| 281 <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"/> | |
| 282 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/> | |
| 283 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/> | |
| 284 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help=""> | |
| 285 <option value="umap">umap (McInnes et al, 2018)</option> | |
| 286 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option> | |
| 287 </param> | |
| 288 <param argument="metric" type="select" label="Distance metric" help=""> | |
| 289 <expand macro="distance_metric_options"/> | |
| 53 </param> | 290 </param> |
| 54 </when> | 291 </when> |
| 55 <when value="tl.dpt"> | 292 <when value="tl.score_genes"> |
| 56 <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/> | 293 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/> |
| 57 <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/> | 294 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" |
| 58 <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for `n_branchings` > 1, do not consider groups that contain less than `min_group_size` data points. If a float, `min_group_size` refers to a fraction of the total number of data points."/> | 295 help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/> |
| 59 <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/> | 296 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" |
| 297 help="Default is all genes. Genes separated by a comma"/> | |
| 298 <expand macro="score_genes_params"/> | |
| 299 <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/> | |
| 60 </when> | 300 </when> |
| 301 <when value="tl.score_genes_cell_cycle"> | |
| 302 <conditional name='s_genes'> | |
| 303 <param name="format" type="select" label="Format for the list of genes associated with S phase"> | |
| 304 <option value="file">File</option> | |
| 305 <option value="text" selected="true">Text</option> | |
| 306 </param> | |
| 307 <when value="text"> | |
| 308 <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/> | |
| 309 </when> | |
| 310 <when value="file"> | |
| 311 <param name="file" type="data" format="txt" label="File with the list of genes associated with S phase" help="One gene per line"/> | |
| 312 </when> | |
| 313 </conditional> | |
| 314 <conditional name='g2m_genes'> | |
| 315 <param name="format" type="select" label="Format for the list of genes associated with G2M phase"> | |
| 316 <option value="file">File</option> | |
| 317 <option value="text" selected="true">Text</option> | |
| 318 </param> | |
| 319 <when value="text"> | |
| 320 <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/> | |
| 321 </when> | |
| 322 <when value="file"> | |
| 323 <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/> | |
| 324 </when> | |
| 325 </conditional> | |
| 326 <expand macro="score_genes_params"/> | |
| 327 </when> | |
| 328 <when value="tl.rank_genes_groups"> | |
| 329 <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""/> | |
| 330 <expand macro="param_use_raw"/> | |
| 331 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."/> | |
| 332 <conditional name="ref"> | |
| 333 <param name="rest" type="select" label="Comparison"> | |
| 334 <option value="rest">Compare each group to the union of the rest of the group</option> | |
| 335 <option value="group_id">Compare with respect to a specific group</option> | |
| 336 </param> | |
| 337 <when value="rest"/> | |
| 338 <when value="group_id"> | |
| 339 <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"/> | |
| 340 </when> | |
| 341 </conditional> | |
| 342 <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/> | |
| 343 <conditional name="tl_rank_genes_groups_method"> | |
| 344 <param argument="method" type="select" label="Method"> | |
| 345 <option value="t-test">t-test</option> | |
| 346 <option value="wilcoxon">Wilcoxon-Rank-Sum</option> | |
| 347 <option value="t-test_overestim_var" selected="true">t-test with overestimate of variance of each group</option> | |
| 348 <option value="logreg">Logistic regression</option> | |
| 349 </param> | |
| 350 <when value="t-test"> | |
| 351 <expand macro="corr_method"/> | |
| 352 </when> | |
| 353 <when value="wilcoxon"> | |
| 354 <expand macro="corr_method"/> | |
| 355 </when> | |
| 356 <when value="t-test_overestim_var"> | |
| 357 <expand macro="corr_method"/> | |
| 358 </when> | |
| 359 <when value="logreg"> | |
| 360 <conditional name="solver"> | |
| 361 <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty."> | |
| 362 <option value="newton-cg">newton-cg</option> | |
| 363 <option value="lbfgs">lbfgs</option> | |
| 364 <option value="liblinear">liblinear</option> | |
| 365 <option value="sag">sag</option> | |
| 366 <option value="saga">saga</option> | |
| 367 </param> | |
| 368 <when value="newton-cg"> | |
| 369 <expand macro="fit_intercept"/> | |
| 370 <expand macro="max_iter"/> | |
| 371 <expand macro="multi_class"/> | |
| 372 </when> | |
| 373 <when value="lbfgs"> | |
| 374 <expand macro="fit_intercept"/> | |
| 375 <expand macro="max_iter"/> | |
| 376 <expand macro="multi_class"/> | |
| 377 </when> | |
| 378 <when value="liblinear"> | |
| 379 <conditional name="penalty"> | |
| 380 <expand macro="penalty"/> | |
| 381 <when value="l1"/> | |
| 382 <when value="l2"> | |
| 383 <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false" | |
| 384 label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/> | |
| 385 </when> | |
| 386 <when value="customized"> | |
| 387 <expand macro="custom_penalty"/> | |
| 388 </when> | |
| 389 </conditional> | |
| 390 <conditional name="intercept_scaling"> | |
| 391 <param argument="fit_intercept" type="select" | |
| 392 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""> | |
| 393 <option value="True">Yes</option> | |
| 394 <option value="False">No</option> | |
| 395 </param> | |
| 396 <when value="True"> | |
| 397 <param argument="intercept_scaling" type="float" value="1.0" | |
| 398 label="Intercept scaling" | |
| 399 help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/> | |
| 400 </when> | |
| 401 <when value="False"/> | |
| 402 </conditional> | |
| 403 <expand macro="random_state"/> | |
| 404 </when> | |
| 405 <when value="sag"> | |
| 406 <expand macro="fit_intercept"/> | |
| 407 <expand macro="random_state"/> | |
| 408 <expand macro="max_iter"/> | |
| 409 <expand macro="multi_class"/> | |
| 410 </when> | |
| 411 <when value="saga"> | |
| 412 <conditional name="penalty"> | |
| 413 <expand macro="penalty"/> | |
| 414 <when value="l1"/> | |
| 415 <when value="l2"/> | |
| 416 <when value="customized"> | |
| 417 <expand macro="custom_penalty"/> | |
| 418 </when> | |
| 419 </conditional> | |
| 420 <expand macro="fit_intercept"/> | |
| 421 <expand macro="multi_class"/> | |
| 422 </when> | |
| 423 </conditional> | |
| 424 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/> | |
| 425 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength" | |
| 426 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/> | |
| 427 </when> | |
| 428 </conditional> | |
| 429 <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
| 430 label="Only consider positive differences?" help=""/> | |
| 431 </when> | |
| 432 <!--<when value="tl.marker_gene_overlap"> | |
| 433 <repeat name="reference_markers" title="Marker genes"> | |
| 434 <param name="key" type="text" value="" label="Cell identity name" help=""/> | |
| 435 <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from `var`"/> | |
| 436 </repeat> | |
| 437 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/> | |
| 438 <conditional name="overlap"> | |
| 439 <param argument="method" type="select" label="Method to calculate marker gene overlap"> | |
| 440 <option value="overlap_count">overlap_count: Intersection of the gene set</option> | |
| 441 <option value="overlap_coef">overlap_coef: Overlap coefficient</option> | |
| 442 <option value="jaccard">jaccard: Jaccard index</option> | |
| 443 </param> | |
| 444 <when value="overlap_count"> | |
| 445 <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output"> | |
| 446 <option value="None">None</option> | |
| 447 <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option> | |
| 448 <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option> | |
| 449 </param> | |
| 450 </when> | |
| 451 <when value="overlap_coef"/> | |
| 452 <when value="jaccard"/> | |
| 453 </conditional> | |
| 454 <param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> | |
| 455 <param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> | |
| 456 <param argument="key_added" type="text" value="" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/> | |
| 457 </when>--> | |
| 458 <when value="pp.log1p"/> | |
| 459 <when value="pp.scale"> | |
| 460 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
| 461 label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> | |
| 462 <param argument="max_value" type="float" value="" optional="true" label="Maximum value" | |
| 463 help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> | |
| 464 </when> | |
| 465 <when value="pp.sqrt"/> | |
| 61 </conditional> | 466 </conditional> |
| 62 <expand macro="anndata_output_format"/> | |
| 63 </inputs> | 467 </inputs> |
| 64 <outputs> | 468 <outputs> |
| 65 <expand macro="anndata_outputs"/> | 469 <expand macro="anndata_outputs"/> |
| 66 <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation"> | |
| 67 <filter>method['method'] == 'tl.dpt'</filter> | |
| 68 </data> | |
| 69 </outputs> | 470 </outputs> |
| 70 <tests> | 471 <tests> |
| 71 <test> | 472 <test> |
| 72 <conditional name="input"> | 473 <!-- test 1 --> |
| 73 <param name="format" value="h5ad" /> | 474 <param name="adata" value="sparce_csr_matrix.h5ad" /> |
| 74 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> | 475 <conditional name="method"> |
| 75 </conditional> | 476 <param name="method" value="pp.calculate_qc_metrics"/> |
| 76 <conditional name="method"> | 477 <param name="expr_type" value="counts"/> |
| 77 <param name="method" value="tl.paga"/> | 478 <param name="var_type" value="genes"/> |
| 78 <param name="groups" value="paul15_clusters"/> | 479 <param name="qc_vars" value="mito,negative"/> |
| 79 <param name="use_rna_velocity" value="False"/> | 480 <param name="percent_top" value=""/> |
| 80 <param name="model" value="v1.2"/> | 481 </conditional> |
| 81 </conditional> | 482 <assert_stdout> |
| 82 <param name="anndata_output_format" value="h5ad" /> | 483 <has_text_matching expression="sc.pp.calculate_qc_metrics" /> |
| 83 <assert_stdout> | 484 <has_text_matching expression="expr_type='counts'" /> |
| 84 <has_text_matching expression="sc.tl.paga"/> | 485 <has_text_matching expression="var_type='genes'" /> |
| 85 <has_text_matching expression="groups='paul15_clusters'"/> | 486 <has_text_matching expression="qc_vars=\['mito', 'negative'\]" /> |
| 86 <has_text_matching expression="use_rna_velocity =False"/> | 487 </assert_stdout> |
| 87 <has_text_matching expression="model='v1.2'"/> | 488 <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/> |
| 88 </assert_stdout> | 489 </test> |
| 89 <output name="anndata_out_h5ad" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> | 490 <test> |
| 491 <!-- test 2 --> | |
| 492 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> | |
| 493 <conditional name="method"> | |
| 494 <param name="method" value="pp.neighbors"/> | |
| 495 <param name="n_neighbors" value="15"/> | |
| 496 <param name="knn" value="True"/> | |
| 497 <param name="random_state" value="0"/> | |
| 498 <param name="pp_neighbors_method" value="umap"/> | |
| 499 <param name="metric" value="euclidean"/> | |
| 500 </conditional> | |
| 501 <assert_stdout> | |
| 502 <has_text_matching expression="sc.pp.neighbors"/> | |
| 503 <has_text_matching expression="n_neighbors=15"/> | |
| 504 <has_text_matching expression="knn=True"/> | |
| 505 <has_text_matching expression="random_state=0"/> | |
| 506 <has_text_matching expression="method='umap'"/> | |
| 507 <has_text_matching expression="metric='euclidean'"/> | |
| 508 </assert_stdout> | |
| 509 <output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"> | |
| 90 <assert_contents> | 510 <assert_contents> |
| 91 <has_h5_keys keys="X, obs, obsm, uns, var" /> | 511 <has_h5_keys keys="X, obs, obsm, uns, var" /> |
| 92 </assert_contents> | 512 </assert_contents> |
| 93 </output> | 513 </output> |
| 94 </test> | 514 </test> |
| 95 <test> | 515 <test> |
| 96 <conditional name="input"> | 516 <!-- test 3 --> |
| 97 <param name="format" value="h5ad" /> | 517 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> |
| 98 <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> | 518 <conditional name="method"> |
| 99 </conditional> | 519 <param name="method" value="pp.neighbors"/> |
| 100 <conditional name="method"> | 520 <param name="n_neighbors" value="15"/> |
| 101 <param name="method" value="tl.dpt"/> | 521 <param name="knn" value="True"/> |
| 102 <param name="n_dcs" value="15"/> | 522 <param name="pp_neighbors_method" value="gauss"/> |
| 103 <param name="n_branchings" value="1"/> | 523 <param name="metric" value="braycurtis"/> |
| 104 <param name="min_group_size" value="0.01"/> | 524 </conditional> |
| 105 <param name="allow_kendall_tau_shift" value="True"/> | 525 <assert_stdout> |
| 106 </conditional> | 526 <has_text_matching expression="sc.pp.neighbors"/> |
| 107 <param name="anndata_output_format" value="h5ad" /> | 527 <has_text_matching expression="n_neighbors=15"/> |
| 108 <assert_stdout> | 528 <has_text_matching expression="knn=True"/> |
| 109 <has_text_matching expression="sc.tl.dpt"/> | 529 <has_text_matching expression="random_state=0"/> |
| 110 <has_text_matching expression="n_dcs=15"/> | 530 <has_text_matching expression="method='gauss'"/> |
| 111 <has_text_matching expression="n_branchings=1"/> | 531 <has_text_matching expression="metric='braycurtis'"/> |
| 112 <has_text_matching expression="min_group_size=0.01"/> | 532 </assert_stdout> |
| 113 <has_text_matching expression="allow_kendall_tau_shift=True"/> | 533 <output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> |
| 114 </assert_stdout> | 534 </test> |
| 115 <output name="anndata_out_h5ad" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> | 535 <test> |
| 536 <!-- test 4 --> | |
| 537 <param name="adata" value="krumsiek11.h5ad" /> | |
| 538 <conditional name="method"> | |
| 539 <param name="method" value="tl.score_genes"/> | |
| 540 <param name="gene_list" value="Gata2, Fog1"/> | |
| 541 <param name="ctrl_size" value="2"/> | |
| 542 <param name="n_bins" value="2"/> | |
| 543 <param name="random_state" value="2"/> | |
| 544 <param name="use_raw" value="False"/> | |
| 545 <param name="score_name" value="score"/> | |
| 546 </conditional> | |
| 547 <assert_stdout> | |
| 548 <has_text_matching expression="sc.tl.score_genes" /> | |
| 549 <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" /> | |
| 550 <has_text_matching expression="ctrl_size=2" /> | |
| 551 <has_text_matching expression="score_name='score'" /> | |
| 552 <has_text_matching expression="n_bins=2" /> | |
| 553 <has_text_matching expression="random_state=2" /> | |
| 554 <has_text_matching expression="use_raw=False" /> | |
| 555 <has_text_matching expression="copy=False" /> | |
| 556 </assert_stdout> | |
| 557 <output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 558 </test> | |
| 559 <test> | |
| 560 <!-- test 5 --> | |
| 561 <param name="adata" value="krumsiek11.h5ad" /> | |
| 562 <conditional name="method"> | |
| 563 <param name="method" value="tl.score_genes_cell_cycle"/> | |
| 564 <conditional name='s_genes'> | |
| 565 <param name="format" value="text"/> | |
| 566 <param name="text" value="Gata2, Fog1, EgrNab"/> | |
| 567 </conditional> | |
| 568 <conditional name='g2m_genes'> | |
| 569 <param name="format" value="text"/> | |
| 570 <param name="text" value="Gata2, Fog1, EgrNab"/> | |
| 571 </conditional> | |
| 572 <param name="n_bins" value="2"/> | |
| 573 <param name="random_state" value="1"/> | |
| 574 <param name="use_raw" value="False"/> | |
| 575 </conditional> | |
| 576 <assert_stdout> | |
| 577 <has_text_matching expression="sc.tl.score_genes_cell_cycle"/> | |
| 578 <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> | |
| 579 <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> | |
| 580 <has_text_matching expression="n_bins=2"/> | |
| 581 <has_text_matching expression="random_state=1"/> | |
| 582 <has_text_matching expression="use_raw=False"/> | |
| 583 </assert_stdout> | |
| 584 <output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 585 </test> | |
| 586 <test> | |
| 587 <!-- test 6 --> | |
| 588 <param name="adata" value="krumsiek11.h5ad" /> | |
| 589 <conditional name="method"> | |
| 590 <param name="method" value="tl.rank_genes_groups"/> | |
| 591 <param name="groupby" value="cell_type"/> | |
| 592 <param name="use_raw" value="True"/> | |
| 593 <conditional name="ref"> | |
| 594 <param name="rest" value="rest"/> | |
| 595 </conditional> | |
| 596 <param name="n_genes" value="100"/> | |
| 597 <conditional name="tl_rank_genes_groups_method"> | |
| 598 <param name="method" value="t-test_overestim_var"/> | |
| 599 <param name="corr_method" value="benjamini-hochberg"/> | |
| 600 </conditional> | |
| 601 <param name="only_positive" value="true"/> | |
| 602 </conditional> | |
| 603 <assert_stdout> | |
| 604 <has_text_matching expression="sc.tl.rank_genes_groups"/> | |
| 605 <has_text_matching expression="groupby='cell_type'"/> | |
| 606 <has_text_matching expression="use_raw=True"/> | |
| 607 <has_text_matching expression="reference='rest'"/> | |
| 608 <has_text_matching expression="n_genes=100"/> | |
| 609 <has_text_matching expression="method='t-test_overestim_var'"/> | |
| 610 <has_text_matching expression="corr_method='benjamini-hochberg'"/> | |
| 611 <has_text_matching expression="only_positive=True"/> | |
| 612 </assert_stdout> | |
| 613 <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 614 </test> | |
| 615 <test> | |
| 616 <!-- test 7 --> | |
| 617 <param name="adata" value="pbmc68k_reduced.h5ad" /> | |
| 618 <conditional name="method"> | |
| 619 <param name="method" value="tl.rank_genes_groups"/> | |
| 620 <param name="groupby" value="louvain"/> | |
| 621 <param name="use_raw" value="True"/> | |
| 622 <conditional name="ref"> | |
| 623 <param name="rest" value="rest"/> | |
| 624 </conditional> | |
| 625 <param name="n_genes" value="100"/> | |
| 626 <conditional name="tl_rank_genes_groups_method"> | |
| 627 <param name="method" value="logreg"/> | |
| 628 <conditional name="solver"> | |
| 629 <param name="solver" value="newton-cg"/> | |
| 630 <param name="fit_intercept" value="True"/> | |
| 631 <param name="max_iter" value="100"/> | |
| 632 <param name="multi_class" value="auto"/> | |
| 633 </conditional> | |
| 634 <param name="tol" value="1e-4"/> | |
| 635 <param name="c" value="1.0"/> | |
| 636 </conditional> | |
| 637 <param name="only_positive" value="true"/> | |
| 638 </conditional> | |
| 639 <assert_stdout> | |
| 640 <has_text_matching expression="sc.tl.rank_genes_groups"/> | |
| 641 <has_text_matching expression="groupby='louvain'"/> | |
| 642 <has_text_matching expression="use_raw=True"/> | |
| 643 <has_text_matching expression="reference='rest'"/> | |
| 644 <has_text_matching expression="n_genes=100"/> | |
| 645 <has_text_matching expression="method='logreg'"/> | |
| 646 <has_text_matching expression="solver='newton-cg'"/> | |
| 647 <has_text_matching expression="penalty='l2'"/> | |
| 648 <has_text_matching expression="fit_intercept=True"/> | |
| 649 <has_text_matching expression="max_iter=100"/> | |
| 650 <has_text_matching expression="multi_class='auto'"/> | |
| 651 <has_text_matching expression="tol=0.0001"/> | |
| 652 <has_text_matching expression="C=1.0"/> | |
| 653 <has_text_matching expression="only_positive=True"/> | |
| 654 </assert_stdout> | |
| 655 <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size"> | |
| 116 <assert_contents> | 656 <assert_contents> |
| 117 <has_h5_keys keys="X, obs, obsm, uns, var" /> | 657 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" /> |
| 118 </assert_contents> | 658 </assert_contents> |
| 119 </output> | 659 </output> |
| 120 <output name="obs" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.obs.tabular" compare="sim_size"/> | 660 </test> |
| 661 <test> | |
| 662 <!-- test 8 --> | |
| 663 <param name="adata" value="pbmc68k_reduced.h5ad" /> | |
| 664 <conditional name="method"> | |
| 665 <param name="method" value="tl.rank_genes_groups"/> | |
| 666 <param name="groupby" value="louvain"/> | |
| 667 <param name="use_raw" value="True"/> | |
| 668 <conditional name="ref"> | |
| 669 <param name="rest" value="rest"/> | |
| 670 </conditional> | |
| 671 <param name="n_genes" value="100"/> | |
| 672 <conditional name="tl_rank_genes_groups_method"> | |
| 673 <param name="method" value="logreg"/> | |
| 674 <conditional name="solver"> | |
| 675 <param name="solver" value="liblinear"/> | |
| 676 <conditional name="penalty"> | |
| 677 <param name="penalty" value="l2"/> | |
| 678 <param name="dual" value="False"/> | |
| 679 <conditional name="intercept_scaling"> | |
| 680 <param name="fit_intercept" value="True"/> | |
| 681 <param name="intercept_scaling" value="1.0" /> | |
| 682 </conditional> | |
| 683 <param name="random_state" value="1"/> | |
| 684 </conditional> | |
| 685 </conditional> | |
| 686 <param name="tol" value="1e-4"/> | |
| 687 <param name="c" value="1.0"/> | |
| 688 </conditional> | |
| 689 <param name="only_positive" value="true"/> | |
| 690 </conditional> | |
| 691 <assert_stdout> | |
| 692 <has_text_matching expression="sc.tl.rank_genes_groups"/> | |
| 693 <has_text_matching expression="groupby='louvain'"/> | |
| 694 <has_text_matching expression="use_raw=True"/> | |
| 695 <has_text_matching expression="reference='rest'"/> | |
| 696 <has_text_matching expression="n_genes=100"/> | |
| 697 <has_text_matching expression="method='logreg'"/> | |
| 698 <has_text_matching expression="solver='liblinear'"/> | |
| 699 <has_text_matching expression="penalty='l2'"/> | |
| 700 <has_text_matching expression="dual=False"/> | |
| 701 <has_text_matching expression="fit_intercept=True"/> | |
| 702 <has_text_matching expression="intercept_scaling=1.0"/> | |
| 703 <has_text_matching expression="tol=0.0001"/> | |
| 704 <has_text_matching expression="C=1.0"/> | |
| 705 <has_text_matching expression="only_positive=True"/> | |
| 706 </assert_stdout> | |
| 707 <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"> | |
| 708 <assert_contents> | |
| 709 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" /> | |
| 710 </assert_contents> | |
| 711 </output> | |
| 712 </test> | |
| 713 <!--<test> | |
| 714 < test 9 > | |
| 715 <param name="adata" value="tl.rank_genes_groups.louvain.neighbors.pca.pbmc68k_reduced.h5ad" /> | |
| 716 <conditional name="method"> | |
| 717 <param name="method" value="tl.marker_gene_overlap"/> | |
| 718 <repeat name="reference_markers"> | |
| 719 <param name="key" value="CD4 T cells"/> | |
| 720 <param name="value" value="IL7R"/> | |
| 721 </repeat> | |
| 722 <repeat name="reference_markers"> | |
| 723 <param name="key" value="CD14+ Monocytes"/> | |
| 724 <param name="value" value="CD14,LYZ"/> | |
| 725 </repeat> | |
| 726 <repeat name="reference_markers"> | |
| 727 <param name="key" value="B cells"/> | |
| 728 <param name="value" value="MS4A1"/> | |
| 729 </repeat> | |
| 730 <conditional name="overlap"> | |
| 731 <param argument="method" value="overlap_count"/> | |
| 732 <param argument="normalize" value="None"/> | |
| 733 </conditional> | |
| 734 </conditional> | |
| 735 <assert_stdout> | |
| 736 <has_text_matching expression="tl.marker_gene_overlap"/> | |
| 737 <has_text_matching expression="key='rank_genes_groups'"/> | |
| 738 <has_text_matching expression="method='overlap_count'"/> | |
| 739 </assert_stdout> | |
| 740 <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 741 </test>--> | |
| 742 <test> | |
| 743 <!-- test 9 --> | |
| 744 <param name="adata" value="krumsiek11.h5ad" /> | |
| 745 <conditional name="method"> | |
| 746 <param name="method" value="pp.log1p"/> | |
| 747 </conditional> | |
| 748 <assert_stdout> | |
| 749 <has_text_matching expression="sc.pp.log1p"/> | |
| 750 </assert_stdout> | |
| 751 <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 752 </test> | |
| 753 <test> | |
| 754 <!-- test 10 --> | |
| 755 <param name="adata" value="krumsiek11.h5ad" /> | |
| 756 <conditional name="method"> | |
| 757 <param name="method" value="pp.scale"/> | |
| 758 <param name="zero_center" value="true"/> | |
| 759 </conditional> | |
| 760 <assert_stdout> | |
| 761 <has_text_matching expression="sc.pp.scale"/> | |
| 762 <has_text_matching expression="zero_center=True"/> | |
| 763 </assert_stdout> | |
| 764 <output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 765 </test> | |
| 766 <test> | |
| 767 <!-- test 11 --> | |
| 768 <param name="adata" value="krumsiek11.h5ad" /> | |
| 769 <conditional name="method"> | |
| 770 <param name="method" value="pp.scale"/> | |
| 771 <param name="zero_center" value="true"/> | |
| 772 <param name="max_value" value="10"/> | |
| 773 </conditional> | |
| 774 <assert_stdout> | |
| 775 <has_text_matching expression="sc.pp.scale"/> | |
| 776 <has_text_matching expression="zero_center=True"/> | |
| 777 <has_text_matching expression="max_value=10.0"/> | |
| 778 </assert_stdout> | |
| 779 <output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 780 </test> | |
| 781 <test> | |
| 782 <!-- test 12 --> | |
| 783 <param name="adata" value="krumsiek11.h5ad" /> | |
| 784 <conditional name="method"> | |
| 785 <param name="method" value="pp.sqrt"/> | |
| 786 </conditional> | |
| 787 <assert_stdout> | |
| 788 <has_text_matching expression="sc.pp.sqrt"/> | |
| 789 </assert_stdout> | |
| 790 <output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
| 121 </test> | 791 </test> |
| 122 </tests> | 792 </tests> |
| 123 <help><![CDATA[ | 793 <help><![CDATA[ |
| 124 Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`) | 794 Calculate quality control metrics., using `pp.calculate_qc_metrics` |
| 125 ======================================================================================= | 795 =================================================================== |
| 126 | 796 |
| 127 By quantifying the connectivity of partitions (groups, clusters) of the | 797 Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. |
| 128 single-cell graph, partition-based graph abstraction (PAGA) generates a much | 798 Currently is most efficient on a sparse CSR or dense matrix. |
| 129 simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights | 799 |
| 130 represent confidence in the presence of connections. By tresholding this | 800 It updates the observation level metrics: |
| 131 confidence in `paga`, a much simpler representation of data | 801 |
| 132 can be obtained. | 802 - total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell) |
| 133 | 803 - total_{expr_type} (e.g. "total_counts", total number of counts for a cell) |
| 134 The confidence can be interpreted as the ratio of the actual versus the | 804 - pct_{expr_type}_in_top_{n}_{var_type} (e.g. "pct_counts_in_top_50_genes", cumulative percentage of counts for 50 most expressed genes in a cell) |
| 135 expected value of connetions under the null model of randomly connecting | 805 - total_{expr_type}_{qc_var} (e.g. "total_counts_mito", total number of counts for variabes in qc_vars ) |
| 136 partitions. We do not provide a p-value as this null model does not | 806 - pct_{expr_type}_{qc_var} (e.g. "pct_counts_mito", proportion of total counts for a cell which are mitochondrial) |
| 137 precisely capture what one would consider "connected" in real data, hence it | 807 |
| 138 strongly overestimates the expected value. See an extensive discussion of | 808 And also the variable level metrics: |
| 139 this in Wolf et al (2017). | 809 |
| 140 | 810 - total_{expr_type} (e.g. "total_counts", sum of counts for a gene) |
| 141 Together with a random walk-based distance measure, this generates a partial | 811 - mean_{expr_type} (e.g. "mean counts", mean expression over all cells. |
| 142 coordinatization of data useful for exploring and explaining its variation. | 812 - n_cells_by_{expr_type} (e.g. "n_cells_by_counts", number of cells this expression is measured in) |
| 143 | 813 - pct_dropout_by_{expr_type} (e.g. "pct_dropout_by_counts", percentage of cells this feature does not appear in) |
| 144 More details on the `tl.paga scanpy documentation | 814 |
| 145 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.paga.html#scanpy.api.tl.paga>`_ | 815 More details on the `scanpy documentation |
| 146 | 816 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.calculate_qc_metrics.html>`__ |
| 147 | 817 |
| 148 Infer progression of cells through geodesic distance along the graph (`tl.dpt`) | 818 Compute a neighborhood graph of observations, using `pp.neighbors` |
| 149 =============================================================================== | 819 ================================================================== |
| 150 | 820 |
| 151 Reconstruct the progression of a biological process from snapshot | 821 The neighbor search efficiency of this heavily relies on UMAP (McInnes et al, 2018), |
| 152 data. `Diffusion Pseudotime` has been introduced by Haghverdi et al (2016) and | 822 which also provides a method for estimating connectivities of data points - |
| 153 implemented within Scanpy (Wolf et al, 2017). Here, we use a further developed | 823 the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`, |
| 154 version, which is able to deal with disconnected graphs (Wolf et al, 2017) and can | 824 connectivities are computed according to Coifman et al (2005), in the adaption of |
| 155 be run in a `hierarchical` mode by setting the parameter | 825 Haghverdi et al (2016). |
| 156 `n_branchings>1`. We recommend, however, to only use | 826 |
| 157 `tl.dpt` for computing pseudotime (`n_branchings=0`) and | 827 The returned AnnData object contains: |
| 158 to detect branchings via `paga`. For pseudotime, you need | 828 |
| 159 to annotate your data with a root cell. | 829 - Weighted adjacency matrix of the neighborhood graph of data points (connectivities). Weights should be interpreted as connectivities. |
| 160 | 830 - Distances for each pair of neighbors (distances) |
| 161 This requires to run `pp.neighbors`, first. In order to | 831 |
| 162 reproduce the original implementation of DPT, use `method=='gauss'` in | 832 This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects |
| 163 this. Using the default `method=='umap'` only leads to minor quantitative | 833 |
| 164 differences, though. | 834 More details on the `scanpy documentation |
| 165 | 835 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.neighbors.html>`__ |
| 166 | 836 |
| 167 If `n_branchings==0`, no field `dpt_groups` will be written. | 837 Score a set of genes, using `tl.score_genes` |
| 168 | 838 ============================================ |
| 169 - dpt_pseudotime : Array of dim (number of samples) that stores the pseudotime of each cell, that is, the DPT distance with respect to the root cell. | 839 |
| 170 - dpt_groups : Array of dim (number of samples) that stores the subgroup id ('0','1', ...) for each cell. The groups typically correspond to 'progenitor cells', 'undecided cells' or 'branches' of a process. | 840 The score is the average expression of a set of genes subtracted with the |
| 171 | 841 average expression of a reference set of genes. The reference set is |
| 172 The tool is similar to the R package `destiny` of Angerer et al (2016). | 842 randomly sampled from the `gene_pool` for each binned expression value. |
| 173 | 843 |
| 174 More details on the `tl.dpt scanpy documentation | 844 This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented |
| 175 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.dpt.html#scanpy.api.tl.dpt>`_ | 845 for Scanpy by Davide Cittaro. |
| 176 | 846 |
| 847 More details on the `scanpy documentation | |
| 848 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes.html>`__ | |
| 849 | |
| 850 Score cell cycle genes, using `tl.score_genes_cell_cycle` | |
| 851 ========================================================= | |
| 852 | |
| 853 Given two lists of genes associated to S phase and G2M phase, calculates | |
| 854 scores and assigns a cell cycle phase (G1, S or G2M). See | |
| 855 `score_genes` for more explanation. | |
| 856 | |
| 857 More details on the `scanpy documentation | |
| 858 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes_cell_cycle.html>`__ | |
| 859 | |
| 860 Rank genes for characterizing groups, using `tl.rank_genes_groups` | |
| 861 ================================================================== | |
| 862 | |
| 863 The returned AnnData object contains: | |
| 864 | |
| 865 - Gene names, ordered according to scores | |
| 866 - Z-score underlying the computation of a p-value for each gene for each group, prdered according to scores | |
| 867 - Log2 fold change for each gene for each group, ordered according to scores. It is only provided if method is ‘t-test’ like. This is an approximation calculated from mean-log values. | |
| 868 - P-values | |
| 869 - Ajusted p-values | |
| 870 | |
| 871 This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects | |
| 872 | |
| 873 More details on the `scanpy documentation | |
| 874 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.rank_genes_groups.html>`__ | |
| 875 | |
| 876 | |
| 877 Calculate an overlap score between data-deriven marker genes and provided markers (`tl.marker_gene_overlap`) | |
| 878 ============================================================================================================ | |
| 879 | |
| 880 Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps. | |
| 881 | |
| 882 | |
| 883 Logarithmize the data matrix (`pp.log1p`) | |
| 884 ========================================= | |
| 885 | |
| 886 More details on the `scanpy documentation | |
| 887 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.log1p.html>`__ | |
| 888 | |
| 889 Scale data to unit variance and zero mean (`pp.scale`) | |
| 890 ====================================================== | |
| 891 | |
| 892 More details on the `scanpy documentation | |
| 893 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.scale.html>`__ | |
| 894 | |
| 895 Computes the square root the data matrix (`pp.sqrt`) | |
| 896 ==================================================== | |
| 897 | |
| 898 `X = sqrt(X)` | |
| 177 ]]></help> | 899 ]]></help> |
| 178 <expand macro="citations"/> | 900 <expand macro="citations"/> |
| 179 </tool> | 901 </tool> |
