Mercurial > repos > bgruening > sklearn_numeric_clustering
comparison main_macros.xml @ 0:dac8a9712939 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
| author | bgruening |
|---|---|
| date | Mon, 02 May 2016 16:16:42 -0400 |
| parents | |
| children | d938b80a954f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dac8a9712939 |
|---|---|
| 1 <macros> | |
| 2 <token name="@VERSION@">0.9</token> | |
| 3 | |
| 4 <xml name="python_requirements"> | |
| 5 <requirements> | |
| 6 <requirement type="package" version="0.2.1b">eden</requirement> | |
| 7 <yield /> | |
| 8 </requirements> | |
| 9 </xml> | |
| 10 | |
| 11 <xml name="macro_stdio"> | |
| 12 <stdio> | |
| 13 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> | |
| 14 </stdio> | |
| 15 </xml> | |
| 16 | |
| 17 <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt"> | |
| 18 <conditional name="selected_tasks"> | |
| 19 <param name="selected_task" type="select" label="Select a Classification Task"> | |
| 20 <option value="load">Load a model and predict</option> | |
| 21 <option value="train" selected="true">Train a model</option> | |
| 22 </param> | |
| 23 <when value="load"> | |
| 24 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file." /> | |
| 25 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> | |
| 26 <conditional name="prediction_options"> | |
| 27 <param name="prediction_option" type="select" label="Select the type of prediction"> | |
| 28 <option value="predict">Predict class labels</option> | |
| 29 <option value="advanced">Include advanced options</option> | |
| 30 </param> | |
| 31 <when value="predict"> | |
| 32 </when> | |
| 33 <when value="advanced"> | |
| 34 </when> | |
| 35 </conditional> | |
| 36 </when> | |
| 37 <when value="train"> | |
| 38 <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)" /> | |
| 39 <conditional name="selected_algorithms"> | |
| 40 <yield /> | |
| 41 </conditional> | |
| 42 </when> | |
| 43 </conditional> | |
| 44 </xml> | |
| 45 | |
| 46 <xml name="advanced_section"> | |
| 47 <section name="options" title="Advanced Options" expanded="False"> | |
| 48 <yield /> | |
| 49 </section> | |
| 50 </xml> | |
| 51 | |
| 52 <xml name="tabular_input"> | |
| 53 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> | |
| 54 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" /> | |
| 55 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" /> | |
| 56 </xml> | |
| 57 | |
| 58 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection."> | |
| 59 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/> | |
| 60 </xml> | |
| 61 | |
| 62 <xml name="n_clusters" token_default_value="8"> | |
| 63 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/> | |
| 64 </xml> | |
| 65 | |
| 66 <xml name="fit_intercept" token_checked="true"> | |
| 67 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> | |
| 68 </xml> | |
| 69 | |
| 70 <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> | |
| 71 <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> | |
| 72 </xml> | |
| 73 | |
| 74 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> | |
| 75 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 76 </xml> | |
| 77 | |
| 78 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> | |
| 79 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> | |
| 80 </xml> | |
| 81 | |
| 82 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> | |
| 83 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> | |
| 84 </xml> | |
| 85 | |
| 86 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> | |
| 87 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
| 88 </xml> | |
| 89 | |
| 90 <!--xml name="class_weight" token_default_value="" token_help_text=""> | |
| 91 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/> | |
| 92 </xml--> | |
| 93 | |
| 94 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. "> | |
| 95 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/> | |
| 96 </xml> | |
| 97 | |
| 98 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters."> | |
| 99 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/> | |
| 100 </xml> | |
| 101 | |
| 102 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample."> | |
| 103 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/> | |
| 104 </xml> | |
| 105 | |
| 106 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. "> | |
| 107 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/> | |
| 108 </xml> | |
| 109 | |
| 110 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. "> | |
| 111 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
| 112 </xml> | |
| 113 | |
| 114 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" "> | |
| 115 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 116 </xml> | |
| 117 | |
| 118 <xml name="n_init" token_default_value="10" > | |
| 119 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/> | |
| 120 </xml> | |
| 121 | |
| 122 <xml name="init"> | |
| 123 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids."> | |
| 124 <option value="k-means++">k-means++</option> | |
| 125 <option value="random">random</option> | |
| 126 </param> | |
| 127 </xml> | |
| 128 | |
| 129 <xml name="multiple_sparse"> | |
| 130 <repeat name="sparse_inputs" min="1" max="10" title="Inputs"> | |
| 131 <param name="input" type="data" format="txt" label="Sparse matrix file (.mtx, .txt)" help="Specify a sparse matrix file in .txt format."/> | |
| 132 </repeat> | |
| 133 </xml> | |
| 134 | |
| 135 <xml name="eden_citation"> | |
| 136 <citations> | |
| 137 <citation type="bibtex"> | |
| 138 @misc{fabrizio_costa_2015_15094, | |
| 139 author = {Fabrizio Costa and | |
| 140 Björn Grüning and | |
| 141 gigolo}, | |
| 142 title = {EDeN: EDeN - Graph Vectorizer}, | |
| 143 month = feb, | |
| 144 year = 2015, | |
| 145 doi = {10.5281/zenodo.15094}, | |
| 146 url = {http://dx.doi.org/10.5281/zenodo.15094} | |
| 147 } | |
| 148 } | |
| 149 </citation> | |
| 150 </citations> | |
| 151 </xml> | |
| 152 | |
| 153 <xml name="sklearn_citation"> | |
| 154 <citations> | |
| 155 <citation type="bibtex"> | |
| 156 @article{scikit-learn, | |
| 157 title={Scikit-learn: Machine Learning in {P}ython}, | |
| 158 author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
| 159 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
| 160 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
| 161 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
| 162 journal={Journal of Machine Learning Research}, | |
| 163 volume={12}, | |
| 164 pages={2825--2830}, | |
| 165 year={2011} | |
| 166 url = {https://github.com/scikit-learn/scikit-learn} | |
| 167 } | |
| 168 </citation> | |
| 169 </citations> | |
| 170 </xml> | |
| 171 | |
| 172 <xml name="scipy_citation"> | |
| 173 <citations> | |
| 174 <citation type="bibtex"> | |
| 175 @Misc{, | |
| 176 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, | |
| 177 title = {{SciPy}: Open source scientific tools for {Python}}, | |
| 178 year = {2001--}, | |
| 179 url = "http://www.scipy.org/", | |
| 180 note = {[Online; accessed 2016-04-09]} | |
| 181 } | |
| 182 </citation> | |
| 183 </citations> | |
| 184 </xml> | |
| 185 | |
| 186 | |
| 187 <xml name="nn_advanced_options"> | |
| 188 <section name="options" title="Advanced Options" expanded="False"> | |
| 189 <yield/> | |
| 190 <param argument="weights" type="select" label="Weight function" help="Used in prediction."> | |
| 191 <option value="uniform" selected="true" help="Uniform weights. All points in each neighborhood are weighted equally.">Uniform</option> | |
| 192 <option value="distance" help="Weight points by the inverse of their distance.">Distance</option> | |
| 193 </param> | |
| 194 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> | |
| 195 <option value="auto" selected="true">Auto</option> | |
| 196 <option value="ball_tree">BallTree</option> | |
| 197 <option value="kd_tree">KDTree</option> | |
| 198 <option value="brute">Brute-force</option> | |
| 199 </param> | |
| 200 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/> | |
| 201 <!--param name="metric"--> | |
| 202 <!--param name="p"--> | |
| 203 <!--param name="metric_params"--> | |
| 204 </section> | |
| 205 </xml> | |
| 206 | |
| 207 <xml name="svc_advanced_options"> | |
| 208 <section name="options" title="Advanced Options" expanded="False"> | |
| 209 <yield/> | |
| 210 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used."> | |
| 211 <option value="rbf" selected="true">rbf</option> | |
| 212 <option value="linear">linear</option> | |
| 213 <option value="poly">poly</option> | |
| 214 <option value="sigmoid">sigmoid</option> | |
| 215 <option value="precomputed">precomputed</option> | |
| 216 </param> | |
| 217 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
| 218 <!--TODO: param argument="gamma" float, optional (default=’auto’) --> | |
| 219 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Independent term in kernel function. dafault: 0.0 "/> | |
| 220 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use the shrinking heuristic" help=" "/> | |
| 221 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> | |
| 222 <!-- param argument="cache_size"--> | |
| 223 <!--expand macro="class_weight"/--> | |
| 224 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> | |
| 225 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/> | |
| 226 <!--param argument="decision_function_shape"--> | |
| 227 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> | |
| 228 </section> | |
| 229 </xml> | |
| 230 | |
| 231 <xml name="spectral_clustering_advanced_options"> | |
| 232 <section name="options" title="Advanced Options" expanded="False"> | |
| 233 <expand macro="n_clusters"/> | |
| 234 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use."> | |
| 235 <option value="arpack" selected="true">arpack</option> | |
| 236 <option value="lobpcg">lobpcg</option> | |
| 237 <option value="amg">amg</option> | |
| 238 <!--None--> | |
| 239 </param> | |
| 240 <expand macro="random_state"/> | |
| 241 <expand macro="n_init"/> | |
| 242 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/> | |
| 243 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. "> | |
| 244 <option value="rbf" selected="true">RBF</option> | |
| 245 <option value="precomputed">precomputed</option> | |
| 246 <option value="nearest_neighbors">Nearset neighbors</option> | |
| 247 </param> | |
| 248 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/> | |
| 249 <!--param argument="eigen_tol"--> | |
| 250 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space."> | |
| 251 <option value="kmeans" selected="true">kmeans</option> | |
| 252 <option value="discretize">discretize</option> | |
| 253 </param> | |
| 254 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
| 255 <param argument="coef0" type="integer" optional="true" value="1" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> | |
| 256 <!--param argument="kernel_params"--> | |
| 257 </section> | |
| 258 </xml> | |
| 259 | |
| 260 <xml name="minibatch_kmeans_advanced_options"> | |
| 261 <section name="options" title="Advanced Options" expanded="False"> | |
| 262 <expand macro="n_clusters"/> | |
| 263 <expand macro="init"/> | |
| 264 <expand macro="n_init" default_value="3"/> | |
| 265 <expand macro="max_iter" default_value="100"/> | |
| 266 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> | |
| 267 <expand macro="random_state"/> | |
| 268 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/> | |
| 269 <!--param argument="compute_labels"--> | |
| 270 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help=" | |
| 271 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia). | |
| 272 To disable, set max_no_improvement to None. "/> | |
| 273 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/> | |
| 274 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/> | |
| 275 </section> | |
| 276 </xml> | |
| 277 | |
| 278 <xml name="kmeans_advanced_options"> | |
| 279 <section name="options" title="Advanced Options" expanded="False"> | |
| 280 <expand macro="n_clusters"/> | |
| 281 <expand macro="init"/> | |
| 282 <expand macro="n_init"/> | |
| 283 <expand macro="max_iter"/> | |
| 284 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> | |
| 285 <!--param argument="precompute_distances"/--> | |
| 286 <expand macro="random_state"/> | |
| 287 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> | |
| 288 </section> | |
| 289 </xml> | |
| 290 | |
| 291 <xml name="birch_advanced_options"> | |
| 292 <section name="options" title="Advanced Options" expanded="False"> | |
| 293 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> | |
| 294 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/> | |
| 295 <expand macro="n_clusters" default_value="3"/> | |
| 296 <!--param argument="compute_labels"/--> | |
| 297 </section> | |
| 298 </xml> | |
| 299 | |
| 300 <xml name="dbscan_advanced_options"> | |
| 301 <section name="options" title="Advanced Options" expanded="False"> | |
| 302 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/> | |
| 303 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/> | |
| 304 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/> | |
| 305 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors."> | |
| 306 <option value="auto" selected="true">auto</option> | |
| 307 <option value="ball_tree">ball_tree</option> | |
| 308 <option value="kd_tree">kd_tree</option> | |
| 309 <option value="brute">brute</option> | |
| 310 </param> | |
| 311 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/> | |
| 312 </section> | |
| 313 </xml> | |
| 314 | |
| 315 <xml name="clustering_algorithms_options"> | |
| 316 <conditional name="algorithm_options"> | |
| 317 <param name="selected_algorithm" type="select" label="Clustering Algorithm"> | |
| 318 <option value="KMeans" selected="true">KMeans</option> | |
| 319 <option value="SpectralClustering">Spectral Clustering</option> | |
| 320 <option value="MiniBatchKMeans">Mini Batch KMeans</option> | |
| 321 <option value="DBSCAN">DBSCAN</option> | |
| 322 <option value="Birch">Birch</option> | |
| 323 </param> | |
| 324 <when value="KMeans"> | |
| 325 <expand macro="kmeans_advanced_options"/> | |
| 326 </when> | |
| 327 <when value="DBSCAN"> | |
| 328 <expand macro="dbscan_advanced_options"/> | |
| 329 </when> | |
| 330 <when value="Birch"> | |
| 331 <expand macro="birch_advanced_options"/> | |
| 332 </when> | |
| 333 <when value="SpectralClustering"> | |
| 334 <expand macro="spectral_clustering_advanced_options"/> | |
| 335 </when> | |
| 336 <when value="MiniBatchKMeans"> | |
| 337 <expand macro="minibatch_kmeans_advanced_options"/> | |
| 338 </when> | |
| 339 </conditional> | |
| 340 </xml> | |
| 341 </macros> |
