Mercurial > repos > ecology > claraguess
comparison claraguess.xml @ 0:52d4151e00d8 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
author | ecology |
---|---|
date | Wed, 28 May 2025 10:12:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:52d4151e00d8 |
---|---|
1 <tool id="ClaraGuess" name="Clara Estimate and Clustering" version="0.1.2" profile="23.2"> | |
2 <description>Environmental clustering using CLARA and BRT predictions</description> | |
3 | |
4 <requirements> | |
5 <requirement type="package" version="4.3.3">r-base</requirement> | |
6 <requirement type="package" version="2.1.8.1">r-cluster</requirement> | |
7 <requirement type="package" version="1.1.4">r-dplyr</requirement> | |
8 <requirement type="package" version="2.0.0">r-tidyverse</requirement> | |
9 </requirements> | |
10 | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 Rscript '$__tool_directory__/claraguess.R' | |
13 '$enviro' | |
14 '$preds' | |
15 '$taxas' | |
16 '$type' | |
17 '$k' | |
18 '$metric' | |
19 '$samples' | |
20 '$data_cluster' | |
21 '$silhouette_plot' | |
22 #if str($type) == "auto": | |
23 '$sih_scores' | |
24 #else: | |
25 'NA' | |
26 #end if | |
27 '$clustered_taxas_env' | |
28 ]]></command> | |
29 | |
30 <inputs> | |
31 <param name="enviro" type="data" format="tabular" label="Environmental data (tabular)"/> | |
32 <param name="preds" type="data" format="tabular" multiple="true" label="BRT prediction files (collection of tabular)"/> | |
33 <param name="taxas" type="data" format="txt" label="List of taxa (from TaxaSeeker)"/> | |
34 | |
35 <param name="type" type="select" label="k is ..."> | |
36 <option value="fixed">the number of clusters (fixed)</option> | |
37 <option value="auto">the maximum number of clusters (automatic)</option> | |
38 </param> | |
39 | |
40 <param name="k" type="integer" optional="true" label="Value of k"/> | |
41 | |
42 <param name="metric" type="select" label=" dissimilarity metric"> | |
43 <option value="manhattan">Manhattan</option> | |
44 <option value="jaccard">Jaccard</option> | |
45 <option value="euclidean" selected="true">Euclidean</option> | |
46 </param> | |
47 | |
48 <param name="samples" type="integer" value="1000" label="Number of samples for CLARA"/> | |
49 </inputs> | |
50 | |
51 <outputs> | |
52 <data name="data_cluster" from_work_dir="data_cluster.tabular" format="tabular" label="Cluster assignments (lat, long, cluster)"/> | |
53 <data name="silhouette_plot" from_work_dir="silhouette_plot.png" format="png" label="Silhouette Index Plot"/> | |
54 <data name="sih_scores" from_work_dir="sih_scores.png" format="png" label="Silhouette Plot"> | |
55 <filter>type == "auto"</filter> | |
56 </data> | |
57 <data name="clustered_taxas_env" from_work_dir="clustered_taxas_env.tabular" format="tabular" label="Environment + Clustered Data"/> | |
58 </outputs> | |
59 | |
60 <tests> | |
61 <test expect_num_outputs="3"> | |
62 <param name="enviro" value="enviro.tabular"/> | |
63 <param name="preds" value="preds.tabular"/> | |
64 <param name="taxas" value="taxas.tabular"/> | |
65 <param name="type" value="fixed"/> | |
66 <param name="k" value="3"/> | |
67 <param name="metric" value="manhattan"/> | |
68 <param name="samples" value="10"/> | |
69 | |
70 <output name="data_cluster"> | |
71 <assert_contents> | |
72 <has_line_matching expression="^lat\tlong\tcluster$"/> | |
73 <has_n_columns n="3"/> | |
74 </assert_contents> | |
75 </output> | |
76 | |
77 <output name="silhouette_plot"> | |
78 <assert_contents> | |
79 <has_size value="8400" delta="600"/> | |
80 </assert_contents> | |
81 </output> | |
82 | |
83 <output name="clustered_taxas_env"> | |
84 <assert_contents> | |
85 <has_line_matching expression="^lat\tlong\tcluster.*$"/> | |
86 </assert_contents> | |
87 </output> | |
88 </test> | |
89 | |
90 <test expect_num_outputs="4"> | |
91 <param name="enviro" value="enviro.tabular"/> | |
92 <param name="preds" value="preds.tabular"/> | |
93 <param name="taxas" value="taxas.tabular"/> | |
94 <param name="type" value="auto"/> | |
95 <param name="k" value="3"/> | |
96 <param name="metric" value="manhattan"/> | |
97 <param name="samples" value="10"/> | |
98 | |
99 <output name="data_cluster"> | |
100 <assert_contents> | |
101 <has_line_matching expression="^lat\tlong\tcluster$"/> | |
102 <has_n_columns n="3"/> | |
103 </assert_contents> | |
104 </output> | |
105 | |
106 <output name="silhouette_plot"> | |
107 <assert_contents> | |
108 <has_size value="8400" delta="600"/> | |
109 </assert_contents> | |
110 </output> | |
111 | |
112 <output name="sih_scores"> | |
113 <assert_contents> | |
114 <has_size value="6918" delta="600"/> | |
115 </assert_contents> | |
116 </output> | |
117 | |
118 <output name="clustered_taxas_env"> | |
119 <assert_contents> | |
120 <has_line_matching expression="^lat\tlong\tcluster.*$"/> | |
121 </assert_contents> | |
122 </output> | |
123 </test> | |
124 </tests> | |
125 | |
126 <help><![CDATA[ | |
127 ================== | |
128 **What it does ?** | |
129 ================== | |
130 | |
131 This tool applies the CLARA clustering method to identify environmental clusters based on: | |
132 - BRT model predictions (a collection of tabular files), | |
133 - environmental variables (tabular), | |
134 - a list of taxa (tabular, from TaxaSeeker). | |
135 The tool enables the determination of the optimal number of clusters for partition-based clustering (if automatic mode is selected), along with generating files used in the subsequent ecoregionalization workflow. | |
136 | |
137 =================== | |
138 **How to use it ?** | |
139 =================== | |
140 | |
141 ## Parameters: | |
142 | |
143 - **Clustering type**: Choose between a fixed number of clusters ("Number of clusters") or an automatic mode using a maximum number ("Max number of clusters"). | |
144 - **k**: The number of clusters (used based on the selected mode). | |
145 - **Distance metric**: dissimilarity metric / distance used in clustering (Manhattan, Jaccard, or Euclidean). | |
146 - **Samples**: Number of samples drawn for CLARA clustering. | |
147 | |
148 ## Outputs: | |
149 | |
150 - A tabular file containing cluster assignments for each geographic point (columns: lat, long, cluster). | |
151 - A collection of: | |
152 - A silhouette plot (PNG), | |
153 - A silhouette index plot (PNG), | |
154 - A tabular file with original environmental variables and predicted cluster number. | |
155 | |
156 This tool is useful for ecological modeling and spatial analysis, particularly in marine or terrestrial biogeography contexts. | |
157 | |
158 **Example of the environemental file :** | |
159 | |
160 +------+------+---------+------+--------------+-----+ | |
161 | long | lat | Carbo | Grav | Maxbearing | ... | | |
162 +------+------+---------+------+--------------+-----+ | |
163 |139.22|-65.57| 0.88 |28.59 | 3.67 | ... | | |
164 +------+------+---------+------+--------------+-----+ | |
165 |139.22|-65.57| 0.88 |28.61 | 3.64 | ... | | |
166 +------+------+---------+------+--------------+-----+ | |
167 | ... | ... | ... | ... | ... | ... | | |
168 +------+------+---------+------+--------------+-----+ | |
169 | |
170 **Example of the Brt prediction file :** | |
171 | |
172 +-----------+----------+-----------------------+-------------+ | |
173 | lat | long | Prediction.index | spe | | |
174 +-----------+----------+-----------------------+-------------+ | |
175 | -65.57 | 139.22 | 0.122438487221909 | Acarnidae | | |
176 +-----------+----------+-----------------------+-------------+ | |
177 | -65.57 | 139.32 | 0.119154535627801 | Acarnidae | | |
178 +-----------+----------+-----------------------+-------------+ | |
179 | ... | ... | ... | ... | | |
180 +-----------+----------+-----------------------+-------------+ | |
181 | |
182 ]]></help> | |
183 | |
184 <citations> | |
185 <citation type="doi">10.32614/CRAN.package.dplyr</citation> | |
186 <citation type="doi">10.32614/CRAN.package.cluster</citation> | |
187 <citation type="doi">10.32614/CRAN.package.tidyverse</citation> | |
188 </citations> | |
189 </tool> |