Mercurial > repos > bgruening > numeric_clustering
diff numeric_clustering.xml @ 2:1d465f2ebfad draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/numeric_clustering commit adf077b912ddebd97b07b947b855cdd2862ed8ef
author | bgruening |
---|---|
date | Fri, 01 Jan 2016 12:58:17 -0500 |
parents | d645cdee08ed |
children | 6bfbaf81b8f4 |
line wrap: on
line diff
--- a/numeric_clustering.xml Fri Jan 01 10:42:52 2016 -0500 +++ b/numeric_clustering.xml Fri Jan 01 12:58:17 2016 -0500 @@ -8,9 +8,9 @@ </stdio> <macros> <token name="@VERSION@">0.9</token> - <macro name="n_clusters"> - <param name="n_clusters" type="integer" optional="true" value="8" label="Number of clusters" - help="default value is 8 (--n_clusters)"/> + <macro name="n_clusters" token_default_value="8"> + <param name="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" + help="default value is @DEFAULT_VALUE@ (--n_clusters)"/> </macro> <macro name="n_init"> <param name="n_init" type="integer" optional="true" value="" label="Number of runs with different centroid seeds"/> @@ -62,37 +62,46 @@ #set $json_string = json.dumps( $params ) python "$cluster_script" '$json_string' + && + cat "$cluster_script" >&2 + ]]> </command> <configfiles> <configfile name="cluster_script"> -<![CDATA[#!/usr/bin/env python +<![CDATA[ import sys import json import numpy as np import sklearn.cluster import pandas -data = pandas.DataFrame.from_csv("$infile", sep='\t', header=0, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False ) +data = pandas.read_csv("$infile", sep='\t', header=0, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False ) my_class = getattr(sklearn.cluster, "$algorithm_options.selected_algorithm") cluster_object = my_class() params = json.loads( sys.argv[1] ) cluster_object.set_params(**params) -if $end_column >= $start_column: +#if $end_column and $start_column: + +if $end_column >= $start_column: data_matrix = data.values[:, $start_column-1:$end_column] else: data_matrix = data.values + +#else: +data_matrix = data.values +#end if prediction = cluster_object.fit_predict( data_matrix ) -data['cluster_label'] = prediction -data.to_csv(path_or_buf = "$outfile",sep="\t") +data[len(data.columns)] = prediction +data.to_csv(path_or_buf = "$outfile", sep="\t") ]]> </configfile> </configfiles> <inputs> - <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> - <param name="start_column" label="Clustering column from" type="data_column" data_ref="infile" optional="True" /> - <param name="end_column" label="to" type="data_column" data_ref="infile" optional="True" /> + <param name="infile" type="data" format="tabular" label="Data file with numeric values" /> + <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Clustering column from" /> + <param name="end_column" type="data_column" data_ref="infile" optional="True" label="to" /> <conditional name="algorithm_options"> <param name="selected_algorithm" type="select" label="Clustering Algorithm"> <option value="KMeans">KMeans</option> @@ -105,7 +114,7 @@ <option value="MiniBatchKMeans">Mini Batch KMeans</option> </param> <when value="KMeans"> - <expand macro="n_clusters"/> + <expand macro="n_clusters" default_label="8"/> <expand macro="init"/> <expand macro="n_init"/> <expand macro="max_iter"/> @@ -129,7 +138,7 @@ <when value="Birch"> <param name="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold"/> <param name="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch"/> - <expand macro="n_clusters"/> <!-- default to 3--> + <expand macro="n_clusters" default_label="3" /> <!-- default to 3--> <!--param name="compute_labels" type="boolean" optional="true" truevalue="true" falsevale="false" label="Compute labels for each fit"/--> </when> <when value="AffinityPropagation"> @@ -148,7 +157,7 @@ <param name="cluster_all" type="boolean" optional="true" truevalue="true" falsevale="false" label="Cluster all"/> </when> <when value="AgglomerativeClustering"> - <expand macro="n_clusters"/> <!-- deafault 2--> + <expand macro="n_clusters" default_label="2" /> <!-- deafault 2--> <expand macro="affinity"/> <!--default = euclidean--> <!--param name="memory" type="callable" optional="true" value="Memory(cachedir=None)" label="Caching path"/--> <!--param name="connectivity" type="list array-like or callable" optional="true" value="None" label="Connectivity matrix"/--> @@ -162,7 +171,7 @@ <!--param name="pooling_func" type="callable" optional="np.mean" value="None" label=""/--> </when> <when value="SpectralClustering"> - <expand macro="n_clusters"/> + <expand macro="n_clusters" default_label="8" /> <param name="eigen_solver" type="select" value="arpack" label="Eigenvalue decomposition strategy"> <option value="arpack">arpack</option> <option value="lobpcg">lobpcg</option> @@ -184,7 +193,7 @@ <!--param name="kernel_params" type="dict" optional="true" value="None" label=""/--> </when> <when value="MiniBatchKMeans"> - <expand macro="n_clusters"/> + <expand macro="n_clusters" default_label="8"/> <expand macro="init"/> <expand macro="n_init"/> <!-- default to 3--> <expand macro="max_iter"/> <!--default to 100--> @@ -205,6 +214,8 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="KMeans"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="n_clusters" value="4" /> <param name="init" value="k-means++" /> <param name="random_state" value="100"/> @@ -213,7 +224,9 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="KMeans"/> - <param name="n_clusters" value="6" /> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> + <param name="n_clusters" value="4" /> <param name="init" value="random" /> <param name="random_state" value="100"/> <output name="outfile" file="cluster_result02.txt"/> @@ -221,6 +234,8 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="DBSCAN"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="algorithm" value="kd_tree"/> <param name="leaf_size" value="10"/> <param name="eps" value="1.0"/> @@ -229,19 +244,25 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="Birch"/> - <param name="n_clusters" value="5"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> + <param name="n_clusters" value="4"/> <param name="threshold" value="0.008"/> <output name="outfile" file="cluster_result04.txt"/> </test> <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="Birch"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="branching_factor" value="20"/> <output name="outfile" file="cluster_result05.txt"/> </test> <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="AffinityPropagation"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="affinity" value="euclidean"/> <param name="copy" value="false"/> <output name="outfile" file="cluster_result06.txt"/> @@ -249,24 +270,32 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="AffinityPropagation"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="damping" value="0.8"/> <output name="outfile" file="cluster_result07.txt"/> </test> <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="MeanShift"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="min_bin_freq" value="3"/> <output name="outfile" file="cluster_result08.txt"/> </test> <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="MeanShift"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="cluster_all" value="False"/> <output name="outfile" file="cluster_result09.txt"/> </test> <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="AgglomerativeClustering"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="affinity" value="euclidean"/> <param name="linkage" value="average"/> <output name="outfile" file="cluster_result10.txt"/> @@ -274,16 +303,20 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="AgglomerativeClustering"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="linkage" value="complete"/> - <param name="n_clusters" value="5"/> + <param name="n_clusters" value="4"/> <output name="outfile" file="cluster_result11.txt"/> </test> <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="SpectralClustering"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="eigen_solver" value="arpack"/> <param name="n_neighbors" value="12"/> - <param name="n_clusters" value="7"/> + <param name="n_clusters" value="4"/> <param name="assign_labels" value="discretize"/> <param name="random_state" value="100"/> <output name="outfile" file="cluster_result12.txt"/> @@ -291,6 +324,8 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="SpectralClustering"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="assign_labels" value="discretize"/> <param name="random_state" value="100"/> <param name="degree" value="2"/> @@ -299,6 +334,8 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="MiniBatchKMeans"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="tol" value="0.5"/> <param name="random_state" value="100"/> <output name="outfile" file="cluster_result14.txt"/> @@ -307,8 +344,10 @@ <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="MiniBatchKMeans"/> <param name="n_init" value="5"/> + <param name="start_column" value="2" /> + <param name="end_column" value="4" /> <param name="batch_size" value="10"/> - <param name="n_clusters" value="3"/> + <param name="n_clusters" value="4"/> <param name="random_state" value="100"/> <param name="reassignment_ratio" value="1.0"/> <output name="outfile" file="cluster_result15.txt"/> @@ -316,10 +355,9 @@ <test> <param name="infile" value="numeric_values.tabular" ftype="tabular"/> <param name="selected_algorithm" value="KMeans"/> - <param name="start_column" value="3" /> - <param name="end_column" value="4" /> - <param name="n_clusters" value="6" /> - <param name="init" value="random" /> + <param name="start_column" value="1" /> + <param name="end_column" value="1" /> + <param name="n_clusters" value="4" /> <param name="random_state" value="100"/> <output name="outfile" file="cluster_result16.txt"/> </test>