comparison claraguess.xml @ 0:52d4151e00d8 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
author ecology
date Wed, 28 May 2025 10:12:06 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:52d4151e00d8
1 <tool id="ClaraGuess" name="Clara Estimate and Clustering" version="0.1.2" profile="23.2">
2 <description>Environmental clustering using CLARA and BRT predictions</description>
3
4 <requirements>
5 <requirement type="package" version="4.3.3">r-base</requirement>
6 <requirement type="package" version="2.1.8.1">r-cluster</requirement>
7 <requirement type="package" version="1.1.4">r-dplyr</requirement>
8 <requirement type="package" version="2.0.0">r-tidyverse</requirement>
9 </requirements>
10
11 <command detect_errors="exit_code"><![CDATA[
12 Rscript '$__tool_directory__/claraguess.R'
13 '$enviro'
14 '$preds'
15 '$taxas'
16 '$type'
17 '$k'
18 '$metric'
19 '$samples'
20 '$data_cluster'
21 '$silhouette_plot'
22 #if str($type) == "auto":
23 '$sih_scores'
24 #else:
25 'NA'
26 #end if
27 '$clustered_taxas_env'
28 ]]></command>
29
30 <inputs>
31 <param name="enviro" type="data" format="tabular" label="Environmental data (tabular)"/>
32 <param name="preds" type="data" format="tabular" multiple="true" label="BRT prediction files (collection of tabular)"/>
33 <param name="taxas" type="data" format="txt" label="List of taxa (from TaxaSeeker)"/>
34
35 <param name="type" type="select" label="k is ...">
36 <option value="fixed">the number of clusters (fixed)</option>
37 <option value="auto">the maximum number of clusters (automatic)</option>
38 </param>
39
40 <param name="k" type="integer" optional="true" label="Value of k"/>
41
42 <param name="metric" type="select" label=" dissimilarity metric">
43 <option value="manhattan">Manhattan</option>
44 <option value="jaccard">Jaccard</option>
45 <option value="euclidean" selected="true">Euclidean</option>
46 </param>
47
48 <param name="samples" type="integer" value="1000" label="Number of samples for CLARA"/>
49 </inputs>
50
51 <outputs>
52 <data name="data_cluster" from_work_dir="data_cluster.tabular" format="tabular" label="Cluster assignments (lat, long, cluster)"/>
53 <data name="silhouette_plot" from_work_dir="silhouette_plot.png" format="png" label="Silhouette Index Plot"/>
54 <data name="sih_scores" from_work_dir="sih_scores.png" format="png" label="Silhouette Plot">
55 <filter>type == "auto"</filter>
56 </data>
57 <data name="clustered_taxas_env" from_work_dir="clustered_taxas_env.tabular" format="tabular" label="Environment + Clustered Data"/>
58 </outputs>
59
60 <tests>
61 <test expect_num_outputs="3">
62 <param name="enviro" value="enviro.tabular"/>
63 <param name="preds" value="preds.tabular"/>
64 <param name="taxas" value="taxas.tabular"/>
65 <param name="type" value="fixed"/>
66 <param name="k" value="3"/>
67 <param name="metric" value="manhattan"/>
68 <param name="samples" value="10"/>
69
70 <output name="data_cluster">
71 <assert_contents>
72 <has_line_matching expression="^lat\tlong\tcluster$"/>
73 <has_n_columns n="3"/>
74 </assert_contents>
75 </output>
76
77 <output name="silhouette_plot">
78 <assert_contents>
79 <has_size value="8400" delta="600"/>
80 </assert_contents>
81 </output>
82
83 <output name="clustered_taxas_env">
84 <assert_contents>
85 <has_line_matching expression="^lat\tlong\tcluster.*$"/>
86 </assert_contents>
87 </output>
88 </test>
89
90 <test expect_num_outputs="4">
91 <param name="enviro" value="enviro.tabular"/>
92 <param name="preds" value="preds.tabular"/>
93 <param name="taxas" value="taxas.tabular"/>
94 <param name="type" value="auto"/>
95 <param name="k" value="3"/>
96 <param name="metric" value="manhattan"/>
97 <param name="samples" value="10"/>
98
99 <output name="data_cluster">
100 <assert_contents>
101 <has_line_matching expression="^lat\tlong\tcluster$"/>
102 <has_n_columns n="3"/>
103 </assert_contents>
104 </output>
105
106 <output name="silhouette_plot">
107 <assert_contents>
108 <has_size value="8400" delta="600"/>
109 </assert_contents>
110 </output>
111
112 <output name="sih_scores">
113 <assert_contents>
114 <has_size value="6918" delta="600"/>
115 </assert_contents>
116 </output>
117
118 <output name="clustered_taxas_env">
119 <assert_contents>
120 <has_line_matching expression="^lat\tlong\tcluster.*$"/>
121 </assert_contents>
122 </output>
123 </test>
124 </tests>
125
126 <help><![CDATA[
127 ==================
128 **What it does ?**
129 ==================
130
131 This tool applies the CLARA clustering method to identify environmental clusters based on:
132 - BRT model predictions (a collection of tabular files),
133 - environmental variables (tabular),
134 - a list of taxa (tabular, from TaxaSeeker).
135 The tool enables the determination of the optimal number of clusters for partition-based clustering (if automatic mode is selected), along with generating files used in the subsequent ecoregionalization workflow.
136
137 ===================
138 **How to use it ?**
139 ===================
140
141 ## Parameters:
142
143 - **Clustering type**: Choose between a fixed number of clusters ("Number of clusters") or an automatic mode using a maximum number ("Max number of clusters").
144 - **k**: The number of clusters (used based on the selected mode).
145 - **Distance metric**: dissimilarity metric / distance used in clustering (Manhattan, Jaccard, or Euclidean).
146 - **Samples**: Number of samples drawn for CLARA clustering.
147
148 ## Outputs:
149
150 - A tabular file containing cluster assignments for each geographic point (columns: lat, long, cluster).
151 - A collection of:
152 - A silhouette plot (PNG),
153 - A silhouette index plot (PNG),
154 - A tabular file with original environmental variables and predicted cluster number.
155
156 This tool is useful for ecological modeling and spatial analysis, particularly in marine or terrestrial biogeography contexts.
157
158 **Example of the environemental file :**
159
160 +------+------+---------+------+--------------+-----+
161 | long | lat | Carbo | Grav | Maxbearing | ... |
162 +------+------+---------+------+--------------+-----+
163 |139.22|-65.57| 0.88 |28.59 | 3.67 | ... |
164 +------+------+---------+------+--------------+-----+
165 |139.22|-65.57| 0.88 |28.61 | 3.64 | ... |
166 +------+------+---------+------+--------------+-----+
167 | ... | ... | ... | ... | ... | ... |
168 +------+------+---------+------+--------------+-----+
169
170 **Example of the Brt prediction file :**
171
172 +-----------+----------+-----------------------+-------------+
173 | lat | long | Prediction.index | spe |
174 +-----------+----------+-----------------------+-------------+
175 | -65.57 | 139.22 | 0.122438487221909 | Acarnidae |
176 +-----------+----------+-----------------------+-------------+
177 | -65.57 | 139.32 | 0.119154535627801 | Acarnidae |
178 +-----------+----------+-----------------------+-------------+
179 | ... | ... | ... | ... |
180 +-----------+----------+-----------------------+-------------+
181
182 ]]></help>
183
184 <citations>
185 <citation type="doi">10.32614/CRAN.package.dplyr</citation>
186 <citation type="doi">10.32614/CRAN.package.cluster</citation>
187 <citation type="doi">10.32614/CRAN.package.tidyverse</citation>
188 </citations>
189 </tool>