Mercurial > repos > bgruening > sklearn_numeric_clustering
comparison numeric_clustering.xml @ 46:0e4066f5751d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
| author | bgruening |
|---|---|
| date | Wed, 09 Aug 2023 11:47:51 +0000 |
| parents | 006e27f0a7ef |
| children |
comparison
equal
deleted
inserted
replaced
| 45:c2d7b79faaec | 46:0e4066f5751d |
|---|---|
| 1 <tool id="sklearn_numeric_clustering" name="Numeric Clustering" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_numeric_clustering" name="Numeric Clustering" version="@VERSION@" profile="@PROFILE@"> |
| 2 <description></description> | 2 <description></description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
| 40 | 40 |
| 41 cluster_object.set_params(**options) | 41 cluster_object.set_params(**options) |
| 42 if 'n_jobs' in cluster_object.get_params(): | 42 if 'n_jobs' in cluster_object.get_params(): |
| 43 cluster_object.set_params( n_jobs=N_JOBS ) | 43 cluster_object.set_params( n_jobs=N_JOBS ) |
| 44 | 44 |
| 45 header = None | |
| 45 #if $input_types.selected_input_type == "sparse": | 46 #if $input_types.selected_input_type == "sparse": |
| 46 data_matrix = mmread("$infile") | 47 data_matrix = mmread("$infile") |
| 47 #else: | 48 #else: |
| 48 data = pandas.read_csv("$infile", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None) | 49 data = pandas.read_csv("$infile", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) |
| 50 | |
| 49 header = 'infer' if params["input_types"]["header"] else None | 51 header = 'infer' if params["input_types"]["header"] else None |
| 50 column_option = params["input_types"]["column_selector_options"]["selected_column_selector_option"] | 52 column_option = params["input_types"]["column_selector_options"]["selected_column_selector_option"] |
| 51 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 53 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
| 52 c = params["input_types"]["column_selector_options"]["col"] | 54 c = params["input_types"]["column_selector_options"]["col"] |
| 53 else: | 55 else: |
| 181 <tests> | 183 <tests> |
| 182 <test> | 184 <test> |
| 183 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 185 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 184 <param name="selected_input_type" value="tabular" /> | 186 <param name="selected_input_type" value="tabular" /> |
| 185 <param name="selected_algorithm" value="KMeans" /> | 187 <param name="selected_algorithm" value="KMeans" /> |
| 188 <param name="header" value="false" /> | |
| 186 <param name="col" value="2,3,4" /> | 189 <param name="col" value="2,3,4" /> |
| 187 <param name="n_clusters" value="4" /> | 190 <param name="n_clusters" value="4" /> |
| 188 <param name="init" value="k-means++" /> | 191 <param name="init" value="k-means++" /> |
| 189 <param name="random_state" value="100" /> | 192 <param name="random_state" value="100" /> |
| 190 <output name="outfile" file="cluster_result01.txt" /> | 193 <output name="outfile" file="cluster_result01.txt" /> |
| 191 </test> | 194 </test> |
| 192 <test> | 195 <test> |
| 193 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 196 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 194 <param name="selected_algorithm" value="KMeans" /> | 197 <param name="selected_algorithm" value="KMeans" /> |
| 195 <param name="selected_input_type" value="tabular" /> | 198 <param name="selected_input_type" value="tabular" /> |
| 199 <param name="header" value="false" /> | |
| 196 <param name="col" value="2,3,4" /> | 200 <param name="col" value="2,3,4" /> |
| 197 <param name="n_clusters" value="4" /> | 201 <param name="n_clusters" value="4" /> |
| 198 <param name="init" value="random" /> | 202 <param name="init" value="random" /> |
| 199 <param name="random_state" value="100" /> | 203 <param name="random_state" value="100" /> |
| 200 <output name="outfile" file="cluster_result02.txt" /> | 204 <output name="outfile" file="cluster_result02.txt" /> |
| 201 </test> | 205 </test> |
| 202 <test> | 206 <test> |
| 203 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 207 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 204 <param name="selected_algorithm" value="DBSCAN" /> | 208 <param name="selected_algorithm" value="DBSCAN" /> |
| 205 <param name="selected_input_type" value="tabular" /> | 209 <param name="selected_input_type" value="tabular" /> |
| 210 <param name="header" value="false" /> | |
| 206 <param name="col" value="2,3,4" /> | 211 <param name="col" value="2,3,4" /> |
| 207 <param name="algorithm" value="kd_tree" /> | 212 <param name="algorithm" value="kd_tree" /> |
| 208 <param name="leaf_size" value="10" /> | 213 <param name="leaf_size" value="10" /> |
| 209 <param name="eps" value="1.0" /> | 214 <param name="eps" value="1.0" /> |
| 210 <output name="outfile" file="cluster_result03.txt" /> | 215 <output name="outfile" file="cluster_result03.txt" /> |
| 211 </test> | 216 </test> |
| 212 <test> | 217 <test> |
| 213 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 218 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 214 <param name="selected_algorithm" value="Birch" /> | 219 <param name="selected_algorithm" value="Birch" /> |
| 215 <param name="selected_input_type" value="tabular" /> | 220 <param name="selected_input_type" value="tabular" /> |
| 221 <param name="header" value="false" /> | |
| 216 <param name="col" value="2,3,4" /> | 222 <param name="col" value="2,3,4" /> |
| 217 <param name="n_clusters" value="4" /> | 223 <param name="n_clusters" value="4" /> |
| 218 <param name="threshold" value="0.008" /> | 224 <param name="threshold" value="0.008" /> |
| 219 <output name="outfile" file="cluster_result04.txt" /> | 225 <output name="outfile" file="cluster_result04.txt" /> |
| 220 </test> | 226 </test> |
| 221 <test> | 227 <test> |
| 222 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 228 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 223 <param name="selected_algorithm" value="Birch" /> | 229 <param name="selected_algorithm" value="Birch" /> |
| 224 <param name="selected_input_type" value="tabular" /> | 230 <param name="selected_input_type" value="tabular" /> |
| 231 <param name="header" value="false" /> | |
| 225 <param name="col" value="2,3,4" /> | 232 <param name="col" value="2,3,4" /> |
| 226 <param name="branching_factor" value="20" /> | 233 <param name="branching_factor" value="20" /> |
| 227 <output name="outfile" file="cluster_result05.txt" /> | 234 <output name="outfile" file="cluster_result05.txt" /> |
| 228 </test> | 235 </test> |
| 229 <test> | 236 <test> |
| 230 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 237 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 231 <param name="selected_algorithm" value="AffinityPropagation" /> | 238 <param name="selected_algorithm" value="AffinityPropagation" /> |
| 232 <param name="selected_input_type" value="tabular" /> | 239 <param name="selected_input_type" value="tabular" /> |
| 240 <param name="header" value="false" /> | |
| 233 <param name="col" value="2,3,4" /> | 241 <param name="col" value="2,3,4" /> |
| 234 <param name="affinity" value="euclidean" /> | 242 <param name="affinity" value="euclidean" /> |
| 235 <param name="copy" value="false" /> | 243 <param name="copy" value="false" /> |
| 236 <output name="outfile" file="cluster_result06.txt" /> | 244 <output name="outfile" file="cluster_result06.txt" /> |
| 237 </test> | 245 </test> |
| 238 <test> | 246 <test> |
| 239 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 247 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 240 <param name="selected_algorithm" value="AffinityPropagation" /> | 248 <param name="selected_algorithm" value="AffinityPropagation" /> |
| 241 <param name="selected_input_type" value="tabular" /> | 249 <param name="selected_input_type" value="tabular" /> |
| 250 <param name="header" value="false" /> | |
| 242 <param name="col" value="2,3,4" /> | 251 <param name="col" value="2,3,4" /> |
| 243 <param name="damping" value="0.8" /> | 252 <param name="damping" value="0.8" /> |
| 244 <output name="outfile" file="cluster_result07.txt" /> | 253 <output name="outfile" file="cluster_result07.txt" /> |
| 245 </test> | 254 </test> |
| 246 <test> | 255 <test> |
| 247 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 256 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 248 <param name="selected_algorithm" value="MeanShift" /> | 257 <param name="selected_algorithm" value="MeanShift" /> |
| 249 <param name="selected_input_type" value="tabular" /> | 258 <param name="selected_input_type" value="tabular" /> |
| 259 <param name="header" value="false" /> | |
| 250 <param name="col" value="2,3,4" /> | 260 <param name="col" value="2,3,4" /> |
| 251 <param name="min_bin_freq" value="3" /> | 261 <param name="min_bin_freq" value="3" /> |
| 252 <output name="outfile" file="cluster_result08.txt" /> | 262 <output name="outfile" file="cluster_result08.txt" /> |
| 253 </test> | 263 </test> |
| 254 <test> | 264 <test> |
| 255 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 265 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 256 <param name="selected_algorithm" value="MeanShift" /> | 266 <param name="selected_algorithm" value="MeanShift" /> |
| 257 <param name="selected_input_type" value="tabular" /> | 267 <param name="selected_input_type" value="tabular" /> |
| 268 <param name="header" value="false" /> | |
| 258 <param name="col" value="2,3,4" /> | 269 <param name="col" value="2,3,4" /> |
| 259 <param name="cluster_all" value="False" /> | 270 <param name="cluster_all" value="False" /> |
| 260 <output name="outfile" file="cluster_result09.txt" /> | 271 <output name="outfile" file="cluster_result09.txt" /> |
| 261 </test> | 272 </test> |
| 262 <test> | 273 <test> |
| 263 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 274 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 264 <param name="selected_algorithm" value="AgglomerativeClustering" /> | 275 <param name="selected_algorithm" value="AgglomerativeClustering" /> |
| 265 <param name="selected_input_type" value="tabular" /> | 276 <param name="selected_input_type" value="tabular" /> |
| 277 <param name="header" value="false" /> | |
| 266 <param name="col" value="2,3,4" /> | 278 <param name="col" value="2,3,4" /> |
| 267 <param name="affinity" value="euclidean" /> | 279 <param name="affinity" value="euclidean" /> |
| 268 <param name="linkage" value="average" /> | 280 <param name="linkage" value="average" /> |
| 269 <param name="n_clusters" value="4" /> | 281 <param name="n_clusters" value="4" /> |
| 270 <output name="outfile" file="cluster_result10.txt" /> | 282 <output name="outfile" file="cluster_result10.txt" /> |
| 271 </test> | 283 </test> |
| 272 <test> | 284 <test> |
| 273 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 285 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 274 <param name="selected_algorithm" value="AgglomerativeClustering" /> | 286 <param name="selected_algorithm" value="AgglomerativeClustering" /> |
| 275 <param name="selected_input_type" value="tabular" /> | 287 <param name="selected_input_type" value="tabular" /> |
| 288 <param name="header" value="false" /> | |
| 276 <param name="col" value="2,3,4" /> | 289 <param name="col" value="2,3,4" /> |
| 277 <param name="linkage" value="complete" /> | 290 <param name="linkage" value="complete" /> |
| 278 <param name="n_clusters" value="4" /> | 291 <param name="n_clusters" value="4" /> |
| 279 <output name="outfile" file="cluster_result11.txt" /> | 292 <output name="outfile" file="cluster_result11.txt" /> |
| 280 </test> | 293 </test> |
| 281 <test> | 294 <test> |
| 282 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 295 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 283 <param name="selected_algorithm" value="SpectralClustering" /> | 296 <param name="selected_algorithm" value="SpectralClustering" /> |
| 284 <param name="selected_input_type" value="tabular" /> | 297 <param name="selected_input_type" value="tabular" /> |
| 285 <param name="col" value="2,3,4" /> | 298 <param name="col" value="2,3,4" /> |
| 299 <param name="header" value="false" /> | |
| 286 <param name="eigen_solver" value="arpack" /> | 300 <param name="eigen_solver" value="arpack" /> |
| 287 <param name="n_neighbors" value="12" /> | 301 <param name="n_neighbors" value="12" /> |
| 288 <param name="n_clusters" value="4" /> | 302 <param name="n_clusters" value="4" /> |
| 289 <param name="assign_labels" value="discretize" /> | 303 <param name="assign_labels" value="discretize" /> |
| 290 <param name="random_state" value="100" /> | 304 <param name="random_state" value="100" /> |
| 293 <test> | 307 <test> |
| 294 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 308 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 295 <param name="selected_algorithm" value="SpectralClustering" /> | 309 <param name="selected_algorithm" value="SpectralClustering" /> |
| 296 <param name="selected_input_type" value="tabular" /> | 310 <param name="selected_input_type" value="tabular" /> |
| 297 <param name="col" value="2,3,4" /> | 311 <param name="col" value="2,3,4" /> |
| 312 <param name="header" value="false" /> | |
| 298 <param name="assign_labels" value="discretize" /> | 313 <param name="assign_labels" value="discretize" /> |
| 299 <param name="random_state" value="100" /> | 314 <param name="random_state" value="100" /> |
| 300 <param name="degree" value="2" /> | 315 <param name="degree" value="2" /> |
| 301 <output name="outfile" file="cluster_result13.txt" compare="sim_size" /> | 316 <output name="outfile" file="cluster_result13.txt" compare="sim_size" /> |
| 302 </test> | 317 </test> |
| 303 <test> | 318 <test> |
| 304 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 319 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 305 <param name="selected_algorithm" value="MiniBatchKMeans" /> | 320 <param name="selected_algorithm" value="MiniBatchKMeans" /> |
| 306 <param name="selected_input_type" value="tabular" /> | 321 <param name="selected_input_type" value="tabular" /> |
| 322 <param name="header" value="false" /> | |
| 307 <param name="col" value="2,3,4" /> | 323 <param name="col" value="2,3,4" /> |
| 308 <param name="tol" value="0.5" /> | 324 <param name="tol" value="0.5" /> |
| 309 <param name="random_state" value="100" /> | 325 <param name="random_state" value="100" /> |
| 310 <output name="outfile" file="cluster_result14.txt" /> | 326 <output name="outfile" file="cluster_result14.txt" /> |
| 311 </test> | 327 </test> |
| 312 <test> | 328 <test> |
| 313 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 329 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 314 <param name="selected_algorithm" value="MiniBatchKMeans" /> | 330 <param name="selected_algorithm" value="MiniBatchKMeans" /> |
| 315 <param name="selected_input_type" value="tabular" /> | 331 <param name="selected_input_type" value="tabular" /> |
| 332 <param name="header" value="false" /> | |
| 316 <param name="n_init" value="5" /> | 333 <param name="n_init" value="5" /> |
| 317 <param name="col" value="2,3,4" /> | 334 <param name="col" value="2,3,4" /> |
| 318 <param name="batch_size" value="10" /> | 335 <param name="batch_size" value="10" /> |
| 319 <param name="n_clusters" value="4" /> | 336 <param name="n_clusters" value="4" /> |
| 320 <param name="random_state" value="100" /> | 337 <param name="random_state" value="100" /> |
| 323 </test> | 340 </test> |
| 324 <test> | 341 <test> |
| 325 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> | 342 <param name="infile" value="numeric_values.tabular" ftype="tabular" /> |
| 326 <param name="selected_algorithm" value="KMeans" /> | 343 <param name="selected_algorithm" value="KMeans" /> |
| 327 <param name="selected_input_type" value="tabular" /> | 344 <param name="selected_input_type" value="tabular" /> |
| 345 <param name="header" value="false" /> | |
| 328 <param name="col" value="1" /> | 346 <param name="col" value="1" /> |
| 329 <param name="n_clusters" value="4" /> | 347 <param name="n_clusters" value="4" /> |
| 330 <param name="random_state" value="100" /> | 348 <param name="random_state" value="100" /> |
| 331 <output name="outfile" file="cluster_result16.txt" /> | 349 <output name="outfile" file="cluster_result16.txt" /> |
| 332 </test> | 350 </test> |
