comparison cluster_embed.xml @ 0:43711b22f28b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
author iuc
date Tue, 18 Apr 2023 13:18:25 +0000
parents
children 51dbb534fbce
comparison
equal deleted inserted replaced
-1:000000000000 0:43711b22f28b
1 <tool id="episcanpy_cluster_embed" name="Cluster, embed and annotate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>with EpiScanpy</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/>
8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[
10 @CMD@
11 ]]></command>
12 <configfiles>
13 <configfile name="script_file"><![CDATA[
14 @CMD_imports@
15 @CMD_read_inputs@
16 import episcanpy as esc
17 #if $method.method == 'pp.lazy'
18 esc.pp.lazy(
19 adata,
20 pp_pca=$method.lazy_pp_pca,
21 svd_solver='$method.lazy_svd_solver',
22 nb_pcs=$method.lazy_nb_pcs,
23 n_neighbors=$method.lazy_n_neighbors,
24 perplexity=$method.lazy_perplexity,
25 method='$method.lazy_method',
26 metric='$method.lazy_metric',
27 min_dist=$method.lazy_min_dist,
28 spread=$method.lazy_spread,
29 use_highly_variable=$method.lazy_use_highly_variable,
30 n_components=$method.lazy_n_components,
31 )
32
33 #else if $method.method == 'tl.rank_features'
34 esc.tl.rank_features(
35 adata,
36 omic='ATAC',
37 groupby='$method.rank_features_groupby',
38 use_raw=$method.rank_features_use_raw,
39 groups='$method.rank_features_groups',
40 reference='$method.rank_features_reference',
41 n_features=$method.rank_features_n_features,
42 rankby_abs=$method.rank_features_rankby_abs,
43 key_added='$method.rank_features_key_added',
44 copy=False,
45 method='$method.rank_features_method',
46 corr_method='$method.rank_features_corr_method'
47 )
48
49 #else if $method.method == 'tl.find_genes'
50 esc.tl.find_genes(
51 adata,
52 gtf_file='$method.find_genes_gtf_file',
53 key_added='$method.find_genes_key_added',
54 upstream=$method.find_genes_upstream,
55 feature_type='$method.find_genes_feature_type',
56 annotation='$method.find_genes_annotation',
57 raw=$method.find_genes_raw)
58
59 #else if $method.method == 'tl.get_n_clusters'
60 esc.tl.getNClusters(
61 adata,
62 n_cluster=$method.get_n_clusters_n_cluster,
63 range_min=$method.get_n_clusters_range_min,
64 range_max=$method.get_n_clusters_range_max,
65 max_steps=$method.get_n_clusters_max_steps,
66 method='$method.get_n_clusters_method',
67 key_added='$method.get_n_clusters_key_added'
68 )
69
70 #else if $method.method == 'tl.kmeans'
71 esc.tl.kmeans(
72 adata,
73 num_clusters=$method.kmeans_num_clusters
74 )
75
76 #else if $method.method == 'tl.hc'
77 esc.tl.hc(
78 adata,
79 num_clusters=$method.hc_num_clusters
80 )
81 #end if
82 adata.write('anndata.h5ad')
83 ]]></configfile>
84 </configfiles>
85 <inputs>
86 <expand macro="inputs_anndata"/>
87 <conditional name="method">
88 <param argument="method" type="select" label="Method used for Clustering or Embedding">
89 <option value="pp.lazy">Embedding: Automatically compute PCA coordinates, loadings and variance decomposition, a neighborhood graph of observations, t-distributed stochastic neighborhood embedding (tSNE) Uniform Manifold Approximation and Projection (UMAP), using 'pp.lazy'</option>
90 <option value="tl.rank_features">Rank features for characterizing groups, using 'tl.rank_features'</option>
91 <option value="tl.find_genes">Embedding: Find genes and add annotations, using 'pp.find_genes'</option>
92 <option value="tl.get_n_clusters">Clustering: Test different settings of louvain to obtain the target number of clusters, using 'tl.getNClusters'</option>
93 <option value="tl.kmeans">Clustering: Compute kmeans clustering using X_pca fits, using 'tl.kmeans'</option>
94 <option value="tl.hc">Clustering: Compute hierarchical clustering using X_pca fits, using 'tl.hc'</option>
95 </param>
96 <when value="pp.lazy">
97 <param name="lazy_pp_pca" value="True" type="select" label="Compute PCA coordinates before the neighborhood graph" help="(pp_pca)">
98 <option value="True" selected="true">True</option>
99 <option value="False">False</option>
100 </param>
101 <param name="lazy_svd_solver" type="select" label="SVD solver to use" help="(svd_solver)">
102 <option value="arpack" selected="true">arpack (for the ARPACK wrapper in SciPy)</option>
103 <option value="randomized">randomized (for the randomized algorithm due to Halko (2009)</option>
104 <option value="auto">auto (chooses automatically depending on the size of the problem)</option>
105 <option value="lobpcg">lobpcg (an alternative SciPy solver)</option>
106 </param>
107 <param name="lazy_nb_pcs" value="50" min="0" type="integer" label="Number of principal component computed for PCA (and therefore neighbors, tsne and umap)" help="(nb_pcs)"/>
108 <param name="lazy_n_neighbors" value="15" min="0" type="integer" label="Size of the local neighborhood (number of neighboring data points) used for manifold approximation" help="(n_neighbors)"/>
109 <param name="lazy_perplexity" value="30" min="0" type="integer" label="Perplexity (number of nearest neighbors used in other manifold learning algorithms)" help="(perplexity)"/>
110 <param name="lazy_method" type="select" label="Kernel to use for computing connectivities" help="(method)">
111 <option value="umap" selected="true">umap</option>
112 <option value="gauss">gauss</option>
113 </param>
114 <param name="lazy_metric" type="select" label="Metric that returns a distance" help="(metric)">
115 <option value="euclidean" selected="true">euclidean</option>
116 <option value="cityblock">cityblock</option>
117 <option value="cosine">cosine</option>
118 <option value="l1">l1</option>
119 <option value="l2">l2</option>
120 <option value="manhattan">manhattan</option>
121 <option value="braycurtis">braycurtis</option>
122 <option value="canberra">canberra</option>
123 <option value="chebyshev">chebyshev</option>
124 <option value="correlation">correlation</option>
125 <option value="dice">dice</option>
126 <option value="hamming">hamming</option>
127 <option value="jaccard">jaccard</option>
128 <option value="kulsinski">kulsinski</option>
129 <option value="mahalanobis">mahalanobis</option>
130 <option value="minkowski">minkowski</option>
131 <option value="rogerstanimoto">rogerstanimoto</option>
132 <option value="russelrao">russelrao</option>
133 <option value="seuclidean">seuclidean</option>
134 <option value="sokalmichener">sokalmichener</option>
135 <option value="sokalsneath">sokalsneath</option>
136 <option value="sqeuclidean">sqeuclidean</option>
137 <option value="yule">yule</option>
138 </param>
139 <param name="lazy_min_dist" value="0.5" min="0" type="float" label="The effective minimum distance between embedded points" help="(min_dist)"/>
140 <param name="lazy_spread" value="1.0" type="float" label="The effective scale of embedded points" help="(spread)"/>
141 <param name="lazy_use_highly_variable" type="select" label="Use highly variable genes only" help="(use_highly_variable)">
142 <option value="True">True</option>
143 <option value="False" selected="true">False</option>
144 </param>
145 <param name="lazy_n_components" value="2" min="0" type="integer" label="The number of dimensions of the UMAP embedding" help="(n_components)"/>
146 </when>
147 <when value="tl.rank_features">
148 <param name="rank_features_groupby" value="louvain" type="text" label="The key of the observations grouping to consider" help="(groupby)">
149 <sanitizer invalid_char="">
150 <valid initial="string.letters,string.digits">
151 <add value="_" />
152 <add value="-" />
153 <add value=" " />
154 <add value="." />
155 </valid>
156 </sanitizer>
157 <validator type="regex">[0-9a-zA-Z_. -]+</validator>
158 </param>
159 <param name="rank_features_use_raw" type="select" label="Use raw attribute of Anndata if present" help="(use_raw)">
160 <option value="True" selected="true">True</option>
161 <option value="False">False</option>
162 </param>
163 <param name="rank_features_groups" value="all" type="text" label="Subset of groups, to which comparison shall be restricted" help="(groups)">
164 <sanitizer invalid_char="">
165 <valid initial="string.letters,string.digits">
166 <add value="_" />
167 <add value="-" />
168 <add value="." />
169 <add value=" " />
170 <add value="," />
171 </valid>
172 </sanitizer>
173 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
174 </param>
175 <param name="rank_features_reference" value="rest" type="text" label="Compare each group with respect to this group" help="(reference)">
176 <sanitizer invalid_char="">
177 <valid initial="string.letters,string.digits">
178 <add value="_" />
179 <add value="-" />
180 <add value="." />
181 <add value=" " />
182 <add value="," />
183 </valid>
184 </sanitizer>
185 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
186 </param>
187 <param name="rank_features_n_features" value="100" min="1" type="integer" label="The number of features that appear in the returned tables" help="(n_features)"/>
188 <param name="rank_features_rankby_abs" type="select" label="Rank genes by the absolute value of the score, not by the score" help="(rankby_abs)">
189 <option value="True" >True</option>
190 <option value="False" selected="true">False</option>
191 </param>
192 <param name="rank_features_key_added" value="rank_features_groups" type="text" label="The key in adata.uns information is saved to" help="(key_added)">
193 <sanitizer invalid_char="">
194 <valid initial="string.letters,string.digits">
195 <add value="_" />
196 <add value="-" />
197 <add value="." />
198 <add value=" " />
199 <add value="," />
200 </valid>
201 </sanitizer>
202 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
203 </param>
204 <param name="rank_features_method" type="select" label="Method to use" help="(method)">
205 <option value="" selected="true">Auto select for ATAC</option>
206 <option value="logreg">Logistic regression</option>
207 <option value="t-test" >t-test</option>
208 <option value="t-test_overestim_var">t-test_overestim_var</option>
209 <option value="wilcoxon">Wilcoxon rank sum</option>
210 </param>
211 <param name="rank_features_corr_method" value="benjamini-hochberg" type="select" label="p-value correction method" help="(corr_method)">
212 <option value="benjamini-hochberg">Benjamini Hochberg</option>
213 <option value="bonferroni">Bonferroni</option>
214 </param>
215 </when>
216 <when value="tl.find_genes">
217 <param name="find_genes_gtf_file" type="data" format="gtf" label="Annotation GTF file" help="(gtf_file)"/>
218 <param name="find_genes_key_added" value="transcript_annotation" type="text" label="Key added" help="(key_added)">
219 <sanitizer invalid_char="">
220 <valid initial="string.letters,string.digits">
221 <add value="_" />
222 <add value="-" />
223 <add value="." />
224 <add value=" " />
225 <add value="," />
226 </valid>
227 </sanitizer>
228 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
229 </param>
230 <param name="find_genes_upstream" value="2000" min="0" type="integer" label="Upstream" help="(upstream)"/>
231 <param name="find_genes_feature_type" value="transcript" type="text" label="Feature type" help="(feature_type)">
232 <sanitizer invalid_char="">
233 <valid initial="string.letters,string.digits">
234 <add value="_" />
235 <add value="-" />
236 <add value="." />
237 <add value=" " />
238 <add value="," />
239 </valid>
240 </sanitizer>
241 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
242 </param>
243 <param name="find_genes_annotation" value="HAVANA" type="text" label="Annotation" help="(annotation)">
244 <sanitizer invalid_char="">
245 <valid initial="string.letters,string.digits">
246 <add value="_" />
247 <add value="-" />
248 <add value="." />
249 <add value=" " />
250 <add value="," />
251 </valid>
252 </sanitizer>
253 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
254 </param>
255 <param name="find_genes_raw" type="select" label="Raw?" help="(raw)">
256 <option value="True">True</option>
257 <option value="False" selected="true">False</option>
258 </param>
259 </when>
260 <when value="tl.get_n_clusters">
261 <param name="get_n_clusters_n_cluster" value="14" min="1" type="integer" label="Number of clusters" help="(n_cluster)"/>
262 <param name="get_n_clusters_method" type="select" label="Clustering method to use" help="(method)">
263 <option value="leiden" selected="true">leiden</option>
264 <option value="louvain">louvain</option>
265 </param>
266 <param name="get_n_clusters_range_min" value="0" min="0" type="integer" label="Range minimum" help="(range_min)"/>
267 <param name="get_n_clusters_range_max" value="3" min="1" type="integer" label="Range maximum" help="(range_max)"/>
268 <param name="get_n_clusters_max_steps" value="20" min="1" type="integer" label="Maximum number of steps" help="(max_steps)"/>
269 <param name="get_n_clusters_key_added" value="None" type="text" label="Variable name in obs" help="(key_added)">
270 <sanitizer invalid_char="">
271 <valid initial="string.letters,string.digits">
272 <add value="_" />
273 <add value="-" />
274 <add value="." />
275 <add value=" " />
276 <add value="," />
277 </valid>
278 </sanitizer>
279 <validator type="regex">[0-9a-zA-Z_., -]+</validator>
280 </param>
281 </when>
282 <when value="tl.kmeans">
283 <param name="kmeans_num_clusters" value="14" min="1" type="integer" label="Number of clusters" help="(num_clusters)"/>
284 </when>
285 <when value="tl.hc">
286 <param name="hc_num_clusters" value="14" min="1" type="integer" label="Number of clusters" help="(num_clusters)"/>
287 </when>
288 </conditional>
289 <expand macro="inputs_common_advanced"/>
290 </inputs>
291 <outputs>
292 <expand macro="anndata_outputs"/>
293 </outputs>
294 <tests>
295 <test expect_num_outputs="2">
296 <!-- test 0- pp.lazy -->
297 <param name="adata" value="krumsiek11.h5ad" />
298 <conditional name="method">
299 <param name="method" value="pp.lazy"/>
300 <param name="lazy_pp_pca" value="True"/>
301 <param name="lazy_svd_solver" value="arpack"/>
302 <param name="lazy_nb_pcs" value="5"/>
303 <param name="lazy_pp_n_neighbors" value="15"/>
304 <param name="lazy_pp_perplexity" value="30"/>
305 <param name="lazy_method" value="umap"/>
306 <param name="lazy_metric" value="euclidean"/>
307 <param name="lazy_min_dist" value="0.5"/>
308 <param name="lazy_spread" value="1.0"/>
309 <param name="lazy_use_highly_variable" value="False"/>
310 <param name="lazy_n_components" value="2"/>
311 </conditional>
312 <section name="advanced_common">
313 <param name="show_log" value="true" />
314 </section>
315 <output name="hidden_output">
316 <assert_contents>
317 <has_text_matching expression="esc.pp.lazy"/>
318 <has_text_matching expression="adata"/>
319 <has_text_matching expression="nb_pcs=5"/>
320 </assert_contents>
321 </output>
322 <output name="anndata_out" file="krumsiek11.pp.lazy.h5ad" ftype="h5ad" compare="sim_size"/>
323 </test>
324 <test expect_num_outputs="2">
325 <!-- test 1- tl.rank_features -->
326 <param name="adata" value="krumsiek11.pp.lazy.tl.louvain.h5ad" />
327 <conditional name="method">
328 <param name="method" value="tl.rank_features"/>
329 <param name="rank_features_groupby" value="louvain"/>
330 <param name="rank_features_use_raw" value="False"/>
331 <param name="rank_features_groups" value="all"/>
332 <param name="rank_features_reference" value="rest"/>
333 <param name="rank_features_n_features" value="100"/>
334 <param name="rank_features_rankby_abs" value="False"/>
335 <param name="rank_features_key_added" value="rank_features_groups"/>
336 <param name="rank_features_method" value=""/>
337 <param name="rank_features_corr_method" value="benjamini-hochberg"/>
338 </conditional>
339 <section name="advanced_common">
340 <param name="show_log" value="true" />
341 </section>
342 <output name="anndata_out" file="krumsiek11.tl.rank_features.h5ad" ftype="h5ad">
343 <assert_contents>
344 <has_h5_keys keys="obs, uns, obsm, varm, obsp" />
345 </assert_contents>
346 </output>
347 </test>
348 <test expect_num_outputs="2">
349 <!-- test 2-tl.find_genes -->
350 <param name="adata" value="chrY.h5ad" />
351 <conditional name="method">
352 <param name="method" value="tl.find_genes"/>
353 <param name="find_genes_gtf_file" value="chrY.gtf"/>
354 <param name="find_genes_key_added" value="transcript_annotation"/>
355 <param name="find_genes_upstream" value="2000"/>
356 <param name="find_genes_feature_type" value="transcript"/>
357 <param name="find_genes_annotation" value="HAVANA"/>
358 <param name="find_genes_raw" value="False"/>
359 </conditional>
360 <section name="advanced_common">
361 <param name="show_log" value="true" />
362 </section>
363 <output name="anndata_out" file="chrY_with_transcript_annotation.h5ad" ftype="h5ad" compare="sim_size">
364 <assert_contents>
365 <has_h5_keys keys="var" />
366 </assert_contents>
367 </output>
368 </test>
369 <test expect_num_outputs="2">
370 <!-- test 3-tl.get_n_clusters -->
371 <param name="adata" value="krumsiek11.pp.lazy.tl.louvain.h5ad" />
372 <conditional name="method">
373 <param name="method" value="tl.get_n_clusters"/>
374 <param name="get_n_clusters_n_clusters" value="3"/>
375 <param name="get_n_clusters_method" value="louvain"/>
376 <param name="get_n_clusters_range_min" value="0"/>
377 <param name="get_n_clusters_range_max" value="3"/>
378 <param name="get_n_clusters_max_steps" value="20"/>
379 <conditional name="get_n_clusters_obs_key">
380 <param name="get_n_clusters_key_added" value="None"/>
381 </conditional>
382 </conditional>
383 <section name="advanced_common">
384 <param name="show_log" value="true" />
385 </section>
386 <output name="anndata_out" file="krumsiek11.tl.get_n_clusters.h5ad" ftype="h5ad"/>
387 </test>
388 <test expect_num_outputs="2">
389 <!-- test 4-tl.kmeans -->
390 <param name="adata" value="krumsiek11.pp.lazy.h5ad" />
391 <conditional name="method">
392 <param name="method" value="tl.kmeans"/>
393 <param name="kmeans_num_clusters" value="14"/>
394 </conditional>
395 <section name="advanced_common">
396 <param name="show_log" value="true" />
397 </section>
398 <output name="anndata_out" file="krumsiek11.tl.kmeans.h5ad" ftype="h5ad"/>
399 </test>
400 <test expect_num_outputs="2">
401 <!-- test 5-tl.hc -->
402 <param name="adata" value="krumsiek11.pp.lazy.h5ad" />
403 <conditional name="method">
404 <param name="method" value="tl.hc"/>
405 <param name="hc_num_clusters" value="14"/>
406 </conditional>
407 <section name="advanced_common">
408 <param name="show_log" value="true" />
409 </section>
410 <output name="anndata_out" file="krumsiek11.tl.hc.h5ad" ftype="h5ad"/>
411 </test>
412 </tests>
413 <help><![CDATA[
414
415 Automatically compute PCA coordinates (`pp.lazy`)
416 ========================================================================================
417 This function automatically computes PCA coordinates, loadings and variance decomposition, a neighborhood graph of
418 observations, t-distributed stochastic neighborhood embedding (tSNE) Uniform Manifold Approximation and Projection (UMAP).
419
420 More details on the `episcanpy documentation
421 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.lazy.html>`__
422
423 Find and add gene annotations (`tl.find_genes`)
424 ========================================================================================
425 This function adds a gene annotation to an AnnData (h5ad) file from annotations file (.annotation.gtf).
426
427 Automatically obtain target number of clusters (`tl.getNClusters`)
428 ========================================================================================
429 This function will test different settings of louvain to obtain the target number of clusters.
430
431 More details on the `episcanpy documentation
432 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.getNClusters.html>`__
433
434 Perform kmeans clustering (`tl.kmeans`)
435 ========================================================================================
436 This function will perform kmeans clustering using X_pca fits.
437
438 More details on the `episcanpy documentation
439 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.kmeans.html>`__
440
441 Compute hierarchical clustering using X_pca fits (`tl.hc`)
442 ===================================================================
443
444 This function computes heirarchical clustering using X_pca fits using random_state=2019.
445
446 More details on the `episcanpy documentation
447 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.hc.html>`__
448 ]]></help>
449 <expand macro="citations"/>
450 </tool>