Mercurial > repos > bgruening > sklearn_feature_selection
comparison main_macros.xml @ 0:2bbacfaadb5c draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 7a31960686122d7e53054fef4996525f04ebd254
| author | bgruening |
|---|---|
| date | Thu, 12 Apr 2018 08:19:35 -0400 |
| parents | |
| children | f017e93ceda7 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2bbacfaadb5c |
|---|---|
| 1 <macros> | |
| 2 <token name="@VERSION@">0.9</token> | |
| 3 | |
| 4 <token name="@COLUMNS_FUNCTION@"> | |
| 5 def read_columns(f, c, **args): | |
| 6 data = pandas.read_csv(f, **args) | |
| 7 cols = c.split (',') | |
| 8 cols = map(int, cols) | |
| 9 cols = list(map(lambda x: x - 1, cols)) | |
| 10 y = data.iloc[:,cols].values | |
| 11 return y | |
| 12 </token> | |
| 13 | |
| 14 <xml name="python_requirements"> | |
| 15 <requirements> | |
| 16 <requirement type="package" version="2.7">python</requirement> | |
| 17 <requirement type="package" version="0.19.1">scikit-learn</requirement> | |
| 18 <requirement type="package" version="0.22.0">pandas</requirement> | |
| 19 <yield /> | |
| 20 </requirements> | |
| 21 </xml> | |
| 22 | |
| 23 <xml name="macro_stdio"> | |
| 24 <stdio> | |
| 25 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error"/> | |
| 26 </stdio> | |
| 27 </xml> | |
| 28 | |
| 29 | |
| 30 <!--Generic interface--> | |
| 31 <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt"> | |
| 32 <conditional name="selected_tasks"> | |
| 33 <param name="selected_task" type="select" label="Select a Classification Task"> | |
| 34 <option value="train" selected="true">Train a model</option> | |
| 35 <option value="load">Load a model and predict</option> | |
| 36 </param> | |
| 37 <when value="load"> | |
| 38 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/> | |
| 39 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> | |
| 40 <conditional name="prediction_options"> | |
| 41 <param name="prediction_option" type="select" label="Select the type of prediction"> | |
| 42 <option value="predict">Predict class labels</option> | |
| 43 <option value="advanced">Include advanced options</option> | |
| 44 </param> | |
| 45 <when value="predict"> | |
| 46 </when> | |
| 47 <when value="advanced"> | |
| 48 </when> | |
| 49 </conditional> | |
| 50 </when> | |
| 51 <when value="train"> | |
| 52 <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)"/> | |
| 53 <conditional name="selected_algorithms"> | |
| 54 <yield /> | |
| 55 </conditional> | |
| 56 </when> | |
| 57 </conditional> | |
| 58 </xml> | |
| 59 | |
| 60 <xml name="sl_Conditional" token_train="tabular" token_data="tabular" token_model="txt"> | |
| 61 <conditional name="selected_tasks"> | |
| 62 <param name="selected_task" type="select" label="Select a Classification Task"> | |
| 63 <option value="train" selected="true">Train a model</option> | |
| 64 <option value="load">Load a model and predict</option> | |
| 65 </param> | |
| 66 <when value="load"> | |
| 67 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/> | |
| 68 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> | |
| 69 <param name="header" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> | |
| 70 <conditional name="prediction_options"> | |
| 71 <param name="prediction_option" type="select" label="Select the type of prediction"> | |
| 72 <option value="predict">Predict class labels</option> | |
| 73 <option value="advanced">Include advanced options</option> | |
| 74 </param> | |
| 75 <when value="predict"> | |
| 76 </when> | |
| 77 <when value="advanced"> | |
| 78 </when> | |
| 79 </conditional> | |
| 80 </when> | |
| 81 <when value="train"> | |
| 82 <conditional name="selected_algorithms"> | |
| 83 <yield /> | |
| 84 </conditional> | |
| 85 </when> | |
| 86 </conditional> | |
| 87 </xml> | |
| 88 | |
| 89 <xml name="advanced_section"> | |
| 90 <section name="options" title="Advanced Options" expanded="False"> | |
| 91 <yield /> | |
| 92 </section> | |
| 93 </xml> | |
| 94 | |
| 95 | |
| 96 <!--Generalized Linear Models--> | |
| 97 <xml name="loss" token_help=" " token_select="false"> | |
| 98 <param argument="loss" type="select" label="Loss function" help="@HELP@"> | |
| 99 <option value="squared_loss" selected="@SELECT@">squared loss</option> | |
| 100 <option value="huber">huber</option> | |
| 101 <option value="epsilon_insensitive">epsilon insensitive</option> | |
| 102 <option value="squared_epsilon_insensitive">squared epsilon insensitive</option> | |
| 103 <yield/> | |
| 104 </param> | |
| 105 </xml> | |
| 106 | |
| 107 <xml name="penalty" token_help=" "> | |
| 108 <param argument="penalty" type="select" label="Penalty (regularization term)" help="@HELP@"> | |
| 109 <option value="l2" selected="true">l2</option> | |
| 110 <option value="l1">l1</option> | |
| 111 <option value="elasticnet">elastic net</option> | |
| 112 <option value="none">none</option> | |
| 113 <yield/> | |
| 114 </param> | |
| 115 </xml> | |
| 116 | |
| 117 <xml name="l1_ratio" token_default_value="0.15" token_help=" "> | |
| 118 <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@"/> | |
| 119 </xml> | |
| 120 | |
| 121 <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. "> | |
| 122 <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@"/> | |
| 123 </xml> | |
| 124 | |
| 125 <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false"> | |
| 126 <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule" help="@HELP@"> | |
| 127 <option value="optimal" selected="@SELECTED1@">optimal</option> | |
| 128 <option value="constant">constant</option> | |
| 129 <option value="invscaling" selected="@SELECTED2@">inverse scaling</option> | |
| 130 <yield/> | |
| 131 </param> | |
| 132 </xml> | |
| 133 | |
| 134 <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. "> | |
| 135 <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@"/> | |
| 136 </xml> | |
| 137 | |
| 138 <xml name="power_t" token_default_value="0.5" token_help=" "> | |
| 139 <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@"/> | |
| 140 </xml> | |
| 141 | |
| 142 <xml name="normalize" token_checked="false" token_help=" "> | |
| 143 <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" "/> | |
| 144 </xml> | |
| 145 | |
| 146 <xml name="copy_X" token_checked="true" token_help=" "> | |
| 147 <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. "/> | |
| 148 </xml> | |
| 149 | |
| 150 <xml name="ridge_params"> | |
| 151 <expand macro="normalize"/> | |
| 152 <expand macro="alpha" default_value="1.0"/> | |
| 153 <expand macro="fit_intercept"/> | |
| 154 <expand macro="max_iter" default_value=""/> | |
| 155 <expand macro="tol" default_value="0.001" help_text="Precision of the solution. "/> | |
| 156 <!--class_weight--> | |
| 157 <expand macro="copy_X"/> | |
| 158 <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" "> | |
| 159 <option value="auto" selected="true">auto</option> | |
| 160 <option value="svd">svd</option> | |
| 161 <option value="cholesky">cholesky</option> | |
| 162 <option value="lsqr">lsqr</option> | |
| 163 <option value="sparse_cg">sparse_cg</option> | |
| 164 <option value="sag">sag</option> | |
| 165 </param> | |
| 166 <expand macro="random_state"/> | |
| 167 </xml> | |
| 168 | |
| 169 <!--Ensemble methods--> | |
| 170 <xml name="n_estimators" token_default_value="10" token_help=" "> | |
| 171 <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@"/> | |
| 172 </xml> | |
| 173 | |
| 174 <xml name="max_depth" token_default_value="" token_help=" "> | |
| 175 <param argument="max_depth" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@"/> | |
| 176 </xml> | |
| 177 | |
| 178 <xml name="min_samples_split" token_type="integer" token_default_value="2" token_help=" "> | |
| 179 <param argument="min_samples_split" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples required to split an internal node" help="@HELP@"/> | |
| 180 </xml> | |
| 181 | |
| 182 <xml name="min_samples_leaf" token_type="integer" token_default_value="1" token_label="Minimum number of samples in newly created leaves" token_help=" "> | |
| 183 <param argument="min_samples_leaf" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP@"/> | |
| 184 </xml> | |
| 185 | |
| 186 <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" "> | |
| 187 <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@"/> | |
| 188 </xml> | |
| 189 | |
| 190 <xml name="max_leaf_nodes" token_default_value="" token_help=" "> | |
| 191 <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@"/> | |
| 192 </xml> | |
| 193 | |
| 194 <xml name="min_impurity_decrease" token_default_value="0" token_help=" "> | |
| 195 <param argument="min_impurity_decrease" type="float" value="@DEFAULT_VALUE@" optional="true" label="The threshold value of impurity for stopping node splitting" help="@HELP@"/> | |
| 196 </xml> | |
| 197 | |
| 198 <xml name="bootstrap" token_checked="true" token_help=" "> | |
| 199 <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@"/> | |
| 200 </xml> | |
| 201 | |
| 202 <xml name="criterion" token_help=" "> | |
| 203 <param argument="criterion" type="select" label="Function to measure the quality of a split" help=" "> | |
| 204 <option value="gini" selected="true">Gini impurity</option> | |
| 205 <option value="entropy">Information gain</option> | |
| 206 <yield/> | |
| 207 </param> | |
| 208 </xml> | |
| 209 | |
| 210 <xml name="criterion2" token_help=""> | |
| 211 <param argument="criterion" type="select" label="Function to measure the quality of a split" > | |
| 212 <option value="mse">mse - mean squared error</option> | |
| 213 <option value="mae">mae - mean absolute error</option> | |
| 214 <yield/> | |
| 215 </param> | |
| 216 </xml> | |
| 217 | |
| 218 <xml name="oob_score" token_checked="false" token_help=" "> | |
| 219 <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/> | |
| 220 </xml> | |
| 221 | |
| 222 <xml name="max_features"> | |
| 223 <conditional name="select_max_features"> | |
| 224 <param argument="max_features" type="select" label="max_features"> | |
| 225 <option value="auto" selected="true">auto - max_features=n_features</option> | |
| 226 <option value="sqrt">sqrt - max_features=sqrt(n_features)</option> | |
| 227 <option value="log2">log2 - max_features=log2(n_features)</option> | |
| 228 <option value="number_input">I want to type the number in or input None type</option> | |
| 229 </param> | |
| 230 <when value="auto"> | |
| 231 </when> | |
| 232 <when value="sqrt"> | |
| 233 </when> | |
| 234 <when value="log2"> | |
| 235 </when> | |
| 236 <when value="number_input"> | |
| 237 <param name="num_max_features" type="float" value="" optional="true" label="Input max_features number:" help="If int, consider the number of features at each split; If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split."/> | |
| 238 </when> | |
| 239 </conditional> | |
| 240 </xml> | |
| 241 | |
| 242 <xml name="verbose" token_default_value="0" token_help="If 1 then it prints progress and performance once in a while. If greater than 1 then it prints progress and performance for every tree."> | |
| 243 <param argument="verbose" type="integer" value="@DEFAULT_VALUE@" optional="true" label="Enable verbose output" help="@HELP@"/> | |
| 244 </xml> | |
| 245 | |
| 246 <xml name="learning_rate" token_default_value="1.0" token_help=" "> | |
| 247 <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@"/> | |
| 248 </xml> | |
| 249 | |
| 250 <xml name="subsample" token_help=" "> | |
| 251 <param argument="subsample" type="float" value="1.0" optional="true" label="The fraction of samples to be used for fitting the individual base learners" help="@HELP@"/> | |
| 252 </xml> | |
| 253 | |
| 254 <xml name="presort"> | |
| 255 <param argument="presort" type="select" label="Whether to presort the data to speed up the finding of best splits in fitting" > | |
| 256 <option value="auto" selected="true">auto</option> | |
| 257 <option value="true">true</option> | |
| 258 <option value="false">false</option> | |
| 259 </param> | |
| 260 </xml> | |
| 261 | |
| 262 <!--Parameters--> | |
| 263 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection."> | |
| 264 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/> | |
| 265 </xml> | |
| 266 | |
| 267 <xml name="n_clusters" token_default_value="8"> | |
| 268 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/> | |
| 269 </xml> | |
| 270 | |
| 271 <xml name="fit_intercept" token_checked="true"> | |
| 272 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> | |
| 273 </xml> | |
| 274 | |
| 275 <xml name="n_jobs" token_default_value="1" token_label="The number of jobs to run in parallel for both fit and predict"> | |
| 276 <param argument="n_jobs" type="integer" value="@DEFAULT_VALUE@" optional="true" label="@LABEL@" help="If -1, then the number of jobs is set to the number of cores"/> | |
| 277 </xml> | |
| 278 | |
| 279 <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> | |
| 280 <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> | |
| 281 </xml> | |
| 282 | |
| 283 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> | |
| 284 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 285 </xml> | |
| 286 | |
| 287 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> | |
| 288 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> | |
| 289 </xml> | |
| 290 | |
| 291 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> | |
| 292 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> | |
| 293 </xml> | |
| 294 | |
| 295 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> | |
| 296 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
| 297 </xml> | |
| 298 | |
| 299 <!--xml name="class_weight" token_default_value="" token_help_text=""> | |
| 300 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/> | |
| 301 </xml--> | |
| 302 | |
| 303 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. "> | |
| 304 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/> | |
| 305 </xml> | |
| 306 | |
| 307 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters."> | |
| 308 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/> | |
| 309 </xml> | |
| 310 | |
| 311 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample."> | |
| 312 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/> | |
| 313 </xml> | |
| 314 | |
| 315 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. "> | |
| 316 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/> | |
| 317 </xml> | |
| 318 | |
| 319 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. "> | |
| 320 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
| 321 </xml> | |
| 322 | |
| 323 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" "> | |
| 324 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 325 </xml> | |
| 326 | |
| 327 <xml name="n_init" token_default_value="10" > | |
| 328 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/> | |
| 329 </xml> | |
| 330 | |
| 331 <xml name="init"> | |
| 332 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids."> | |
| 333 <option value="k-means++">k-means++</option> | |
| 334 <option value="random">random</option> | |
| 335 </param> | |
| 336 </xml> | |
| 337 | |
| 338 <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" "> | |
| 339 <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 340 </xml> | |
| 341 | |
| 342 <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" "> | |
| 343 <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 344 </xml> | |
| 345 | |
| 346 <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" "> | |
| 347 <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 348 </xml> | |
| 349 | |
| 350 <xml name="pos_label" token_default_value=""> | |
| 351 <param argument="pos_label" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Label of the positive class" help=" "/> | |
| 352 </xml> | |
| 353 | |
| 354 <xml name="average"> | |
| 355 <param argument="average" type="select" optional="true" label="Averaging type" help=" "> | |
| 356 <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option> | |
| 357 <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option> | |
| 358 <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option> | |
| 359 <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option> | |
| 360 <option value="None">None</option> | |
| 361 <yield/> | |
| 362 </param> | |
| 363 </xml> | |
| 364 | |
| 365 <xml name="beta"> | |
| 366 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/> | |
| 367 </xml> | |
| 368 | |
| 369 | |
| 370 <!--Data interface--> | |
| 371 <xml name="tabular_input"> | |
| 372 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> | |
| 373 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" /> | |
| 374 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" /> | |
| 375 </xml> | |
| 376 | |
| 377 <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2=""> | |
| 378 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> | |
| 379 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> | |
| 380 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> | |
| 381 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
| 382 <yield/> | |
| 383 </xml> | |
| 384 | |
| 385 <xml name="samples_tabular" token_multiple1="False" token_multiple2="False"> | |
| 386 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> | |
| 387 <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> | |
| 388 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> | |
| 389 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/> | |
| 390 <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> | |
| 391 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
| 392 <yield/> | |
| 393 </xml> | |
| 394 | |
| 395 <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False"> | |
| 396 <conditional name="true_columns"> | |
| 397 <param name="selected_input1" type="select" label="Select the input type of true labels dataset:"> | |
| 398 <option value="tabular" selected="true">Tabular</option> | |
| 399 <option value="sparse">Sparse</option> | |
| 400 </param> | |
| 401 <when value="tabular"> | |
| 402 <param name="infile1" type="data" label="@LABEL1@"/> | |
| 403 <param name="col1" type="data_column" data_ref="infile1" label="Select the target column:"/> | |
| 404 </when> | |
| 405 <when value="sparse"> | |
| 406 <param name="infile1" type="data" format="txt" label="@LABEL1@"/> | |
| 407 </when> | |
| 408 </conditional> | |
| 409 <conditional name="predicted_columns"> | |
| 410 <param name="selected_input2" type="select" label="Select the input type of predicted labels dataset:"> | |
| 411 <option value="tabular" selected="true">Tabular</option> | |
| 412 <option value="sparse">Sparse</option> | |
| 413 </param> | |
| 414 <when value="tabular"> | |
| 415 <param name="infile2" type="data" label="@LABEL2@"/> | |
| 416 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
| 417 </when> | |
| 418 <when value="sparse"> | |
| 419 <param name="infile2" type="data" format="txt" label="@LABEL1@"/> | |
| 420 </when> | |
| 421 </conditional> | |
| 422 </xml> | |
| 423 | |
| 424 <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False"> | |
| 425 <param name="infile1" type="data" format="tabular" label="@LABEL1@"/> | |
| 426 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select the target column:"/> | |
| 427 <param name="infile2" type="data" format="tabular" label="@LABEL2@"/> | |
| 428 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
| 429 </xml> | |
| 430 | |
| 431 <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format."> | |
| 432 <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):"> | |
| 433 <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/> | |
| 434 </repeat> | |
| 435 </xml> | |
| 436 | |
| 437 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2=""> | |
| 438 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> | |
| 439 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> | |
| 440 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
| 441 </xml> | |
| 442 | |
| 443 <xml name="sl_mixed_input"> | |
| 444 <conditional name="input_options"> | |
| 445 <param name="selected_input" type="select" label="Select input type:"> | |
| 446 <option value="tabular" selected="true">tabular data</option> | |
| 447 <option value="sparse">sparse matrix</option> | |
| 448 </param> | |
| 449 <when value="tabular"> | |
| 450 <expand macro="samples_tabular" multiple1="true"/> | |
| 451 </when> | |
| 452 <when value="sparse"> | |
| 453 <expand macro="sparse_target"/> | |
| 454 </when> | |
| 455 </conditional> | |
| 456 </xml> | |
| 457 | |
| 458 <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd."> | |
| 459 <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/> | |
| 460 </xml> | |
| 461 | |
| 462 | |
| 463 <!--Advanced options--> | |
| 464 <xml name="nn_advanced_options"> | |
| 465 <section name="options" title="Advanced Options" expanded="False"> | |
| 466 <yield/> | |
| 467 <param argument="weights" type="select" label="Weight function" help="Used in prediction."> | |
| 468 <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option> | |
| 469 <option value="distance">Weight points by the inverse of their distance. (Distance)</option> | |
| 470 </param> | |
| 471 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> | |
| 472 <option value="auto" selected="true">Auto</option> | |
| 473 <option value="ball_tree">BallTree</option> | |
| 474 <option value="kd_tree">KDTree</option> | |
| 475 <option value="brute">Brute-force</option> | |
| 476 </param> | |
| 477 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/> | |
| 478 <!--param name="metric"--> | |
| 479 <!--param name="p"--> | |
| 480 <!--param name="metric_params"--> | |
| 481 </section> | |
| 482 </xml> | |
| 483 | |
| 484 <xml name="svc_advanced_options"> | |
| 485 <section name="options" title="Advanced Options" expanded="False"> | |
| 486 <yield/> | |
| 487 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used."> | |
| 488 <option value="rbf" selected="true">rbf</option> | |
| 489 <option value="linear">linear</option> | |
| 490 <option value="poly">poly</option> | |
| 491 <option value="sigmoid">sigmoid</option> | |
| 492 <option value="precomputed">precomputed</option> | |
| 493 </param> | |
| 494 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
| 495 <!--TODO: param argument="gamma" float, optional (default=’auto’) --> | |
| 496 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" | |
| 497 help="Independent term in kernel function. dafault: 0.0 "/> | |
| 498 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 499 label="Use the shrinking heuristic" help=" "/> | |
| 500 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" | |
| 501 label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> | |
| 502 <!-- param argument="cache_size"--> | |
| 503 <!--expand macro="class_weight"/--> | |
| 504 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> | |
| 505 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/> | |
| 506 <!--param argument="decision_function_shape"--> | |
| 507 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> | |
| 508 </section> | |
| 509 </xml> | |
| 510 | |
| 511 <xml name="spectral_clustering_advanced_options"> | |
| 512 <section name="options" title="Advanced Options" expanded="False"> | |
| 513 <expand macro="n_clusters"/> | |
| 514 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use."> | |
| 515 <option value="arpack" selected="true">arpack</option> | |
| 516 <option value="lobpcg">lobpcg</option> | |
| 517 <option value="amg">amg</option> | |
| 518 <!--None--> | |
| 519 </param> | |
| 520 <expand macro="random_state"/> | |
| 521 <expand macro="n_init"/> | |
| 522 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/> | |
| 523 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. "> | |
| 524 <option value="rbf" selected="true">RBF</option> | |
| 525 <option value="precomputed">precomputed</option> | |
| 526 <option value="nearest_neighbors">Nearset neighbors</option> | |
| 527 </param> | |
| 528 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/> | |
| 529 <!--param argument="eigen_tol"--> | |
| 530 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space."> | |
| 531 <option value="kmeans" selected="true">kmeans</option> | |
| 532 <option value="discretize">discretize</option> | |
| 533 </param> | |
| 534 <param argument="degree" type="integer" optional="true" value="3" | |
| 535 label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
| 536 <param argument="coef0" type="integer" optional="true" value="1" | |
| 537 label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> | |
| 538 <!--param argument="kernel_params"--> | |
| 539 </section> | |
| 540 </xml> | |
| 541 | |
| 542 <xml name="minibatch_kmeans_advanced_options"> | |
| 543 <section name="options" title="Advanced Options" expanded="False"> | |
| 544 <expand macro="n_clusters"/> | |
| 545 <expand macro="init"/> | |
| 546 <expand macro="n_init" default_value="3"/> | |
| 547 <expand macro="max_iter" default_value="100"/> | |
| 548 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> | |
| 549 <expand macro="random_state"/> | |
| 550 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/> | |
| 551 <!--param argument="compute_labels"--> | |
| 552 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help=" | |
| 553 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia). | |
| 554 To disable, set max_no_improvement to None. "/> | |
| 555 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/> | |
| 556 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/> | |
| 557 </section> | |
| 558 </xml> | |
| 559 | |
| 560 <xml name="kmeans_advanced_options"> | |
| 561 <section name="options" title="Advanced Options" expanded="False"> | |
| 562 <expand macro="n_clusters"/> | |
| 563 <expand macro="init"/> | |
| 564 <expand macro="n_init"/> | |
| 565 <expand macro="max_iter"/> | |
| 566 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> | |
| 567 <!--param argument="precompute_distances"/--> | |
| 568 <expand macro="random_state"/> | |
| 569 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> | |
| 570 </section> | |
| 571 </xml> | |
| 572 | |
| 573 <xml name="birch_advanced_options"> | |
| 574 <section name="options" title="Advanced Options" expanded="False"> | |
| 575 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> | |
| 576 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/> | |
| 577 <expand macro="n_clusters" default_value="3"/> | |
| 578 <!--param argument="compute_labels"/--> | |
| 579 </section> | |
| 580 </xml> | |
| 581 | |
| 582 <xml name="dbscan_advanced_options"> | |
| 583 <section name="options" title="Advanced Options" expanded="False"> | |
| 584 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/> | |
| 585 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/> | |
| 586 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/> | |
| 587 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors."> | |
| 588 <option value="auto" selected="true">auto</option> | |
| 589 <option value="ball_tree">ball_tree</option> | |
| 590 <option value="kd_tree">kd_tree</option> | |
| 591 <option value="brute">brute</option> | |
| 592 </param> | |
| 593 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/> | |
| 594 </section> | |
| 595 </xml> | |
| 596 | |
| 597 <xml name="clustering_algorithms_options"> | |
| 598 <conditional name="algorithm_options"> | |
| 599 <param name="selected_algorithm" type="select" label="Clustering Algorithm"> | |
| 600 <option value="KMeans" selected="true">KMeans</option> | |
| 601 <option value="SpectralClustering">Spectral Clustering</option> | |
| 602 <option value="MiniBatchKMeans">Mini Batch KMeans</option> | |
| 603 <option value="DBSCAN">DBSCAN</option> | |
| 604 <option value="Birch">Birch</option> | |
| 605 </param> | |
| 606 <when value="KMeans"> | |
| 607 <expand macro="kmeans_advanced_options"/> | |
| 608 </when> | |
| 609 <when value="DBSCAN"> | |
| 610 <expand macro="dbscan_advanced_options"/> | |
| 611 </when> | |
| 612 <when value="Birch"> | |
| 613 <expand macro="birch_advanced_options"/> | |
| 614 </when> | |
| 615 <when value="SpectralClustering"> | |
| 616 <expand macro="spectral_clustering_advanced_options"/> | |
| 617 </when> | |
| 618 <when value="MiniBatchKMeans"> | |
| 619 <expand macro="minibatch_kmeans_advanced_options"/> | |
| 620 </when> | |
| 621 </conditional> | |
| 622 </xml> | |
| 623 | |
| 624 <xml name="distance_metrics"> | |
| 625 <param argument="metric" type="select" label="Distance metric" help=" "> | |
| 626 <option value="euclidean" selected="true">euclidean</option> | |
| 627 <option value="cityblock">cityblock</option> | |
| 628 <option value="cosine">cosine</option> | |
| 629 <option value="l1">l1</option> | |
| 630 <option value="l2">l2</option> | |
| 631 <option value="manhattan">manhattan</option> | |
| 632 <yield/> | |
| 633 </param> | |
| 634 </xml> | |
| 635 | |
| 636 <xml name="distance_nonsparse_metrics"> | |
| 637 <option value="braycurtis">braycurtis</option> | |
| 638 <option value="canberra">canberra</option> | |
| 639 <option value="chebyshev">chebyshev</option> | |
| 640 <option value="correlation">correlation</option> | |
| 641 <option value="dice">dice</option> | |
| 642 <option value="hamming">hamming</option> | |
| 643 <option value="jaccard">jaccard</option> | |
| 644 <option value="kulsinski">kulsinski</option> | |
| 645 <option value="mahalanobis">mahalanobis</option> | |
| 646 <option value="matching">matching</option> | |
| 647 <option value="minkowski">minkowski</option> | |
| 648 <option value="rogerstanimoto">rogerstanimoto</option> | |
| 649 <option value="russellrao">russellrao</option> | |
| 650 <option value="seuclidean">seuclidean</option> | |
| 651 <option value="sokalmichener">sokalmichener</option> | |
| 652 <option value="sokalsneath">sokalsneath</option> | |
| 653 <option value="sqeuclidean">sqeuclidean</option> | |
| 654 <option value="yule">yule</option> | |
| 655 </xml> | |
| 656 | |
| 657 <xml name="pairwise_kernel_metrics"> | |
| 658 <param argument="metric" type="select" label="Pirwise Kernel metric" help=" "> | |
| 659 <option value="rbf" selected="true">rbf</option> | |
| 660 <option value="sigmoid">sigmoid</option> | |
| 661 <option value="polynomial">polynomial</option> | |
| 662 <option value="linear" selected="true">linear</option> | |
| 663 <option value="chi2">chi2</option> | |
| 664 <option value="additive_chi2">additive_chi2</option> | |
| 665 </param> | |
| 666 </xml> | |
| 667 | |
| 668 <xml name="sparse_pairwise_metric_functions"> | |
| 669 <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:"> | |
| 670 <option value="euclidean_distances" selected="true">Euclidean distance matrix</option> | |
| 671 <option value="pairwise_distances">Distance matrix</option> | |
| 672 <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option> | |
| 673 <yield/> | |
| 674 </param> | |
| 675 </xml> | |
| 676 | |
| 677 <xml name="pairwise_metric_functions"> | |
| 678 <option value="additive_chi2_kernel" >Additive chi-squared kernel</option> | |
| 679 <option value="chi2_kernel">Exponential chi-squared kernel</option> | |
| 680 <option value="linear_kernel">Linear kernel</option> | |
| 681 <option value="manhattan_distances">L1 distances</option> | |
| 682 <option value="pairwise_kernels">Kernel</option> | |
| 683 <option value="polynomial_kernel">Polynomial kernel</option> | |
| 684 <option value="rbf_kernel">Gaussian (rbf) kernel</option> | |
| 685 <option value="laplacian_kernel">Laplacian kernel</option> | |
| 686 </xml> | |
| 687 | |
| 688 <xml name="sparse_pairwise_condition"> | |
| 689 <when value="pairwise_distances"> | |
| 690 <section name="options" title="Advanced Options" expanded="False"> | |
| 691 <expand macro="distance_metrics"> | |
| 692 <yield/> | |
| 693 </expand> | |
| 694 </section> | |
| 695 </when> | |
| 696 <when value="euclidean_distances"> | |
| 697 <section name="options" title="Advanced Options" expanded="False"> | |
| 698 <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" | |
| 699 label="Return squared Euclidean distances" help=" "/> | |
| 700 </section> | |
| 701 </when> | |
| 702 </xml> | |
| 703 | |
| 704 <xml name="argmin_distance_condition"> | |
| 705 <when value="pairwise_distances_argmin"> | |
| 706 <section name="options" title="Advanced Options" expanded="False"> | |
| 707 <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed."/> | |
| 708 <expand macro="distance_metrics"> | |
| 709 <yield/> | |
| 710 </expand> | |
| 711 <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run."/> | |
| 712 </section> | |
| 713 </when> | |
| 714 </xml> | |
| 715 | |
| 716 <xml name="sparse_preprocessors"> | |
| 717 <param name="selected_pre_processor" type="select" label="Select a preprocessor:"> | |
| 718 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option> | |
| 719 <option value="Binarizer">Binarizer (Binarizes data)</option> | |
| 720 <option value="Imputer">Imputer (Completes missing values)</option> | |
| 721 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option> | |
| 722 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> | |
| 723 <yield/> | |
| 724 </param> | |
| 725 </xml> | |
| 726 | |
| 727 <xml name="sparse_preprocessor_options"> | |
| 728 <when value="Binarizer"> | |
| 729 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
| 730 <section name="options" title="Advanced Options" expanded="False"> | |
| 731 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 732 label="Use a copy of data for precomputing binarization" help=" "/> | |
| 733 <param argument="threshold" type="float" optional="true" value="0.0" | |
| 734 label="Threshold" | |
| 735 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> | |
| 736 </section> | |
| 737 </when> | |
| 738 <when value="Imputer"> | |
| 739 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
| 740 <section name="options" title="Advanced Options" expanded="False"> | |
| 741 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 742 label="Use a copy of data for precomputing imputation" help=" "/> | |
| 743 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> | |
| 744 <option value="mean" selected="true">Replace missing values using the mean along the axis</option> | |
| 745 <option value="median">Replace missing values using the median along the axis</option> | |
| 746 <option value="most_frequent">Replace missing using the most frequent value along the axis</option> | |
| 747 </param> | |
| 748 <param argument="missing_values" type="text" optional="true" value="NaN" | |
| 749 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> | |
| 750 <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" | |
| 751 label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> | |
| 752 <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" "> | |
| 753 <option value="0" selected="true">Impute along columns</option> | |
| 754 <option value="1">Impute along rows</option> | |
| 755 </param--> | |
| 756 </section> | |
| 757 </when> | |
| 758 <when value="StandardScaler"> | |
| 759 <expand macro="multitype_input"/> | |
| 760 <section name="options" title="Advanced Options" expanded="False"> | |
| 761 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 762 label="Use a copy of data for performing inplace scaling" help=" "/> | |
| 763 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 764 label="Center the data before scaling" help=" "/> | |
| 765 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 766 label="Scale the data to unit variance (or unit standard deviation)" help=" "/> | |
| 767 </section> | |
| 768 </when> | |
| 769 <when value="MaxAbsScaler"> | |
| 770 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
| 771 <section name="options" title="Advanced Options" expanded="False"> | |
| 772 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 773 label="Use a copy of data for precomputing scaling" help=" "/> | |
| 774 </section> | |
| 775 </when> | |
| 776 <when value="Normalizer"> | |
| 777 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
| 778 <section name="options" title="Advanced Options" expanded="False"> | |
| 779 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" "> | |
| 780 <option value="l1" selected="true">l1</option> | |
| 781 <option value="l2">l2</option> | |
| 782 <option value="max">max</option> | |
| 783 </param> | |
| 784 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
| 785 label="Use a copy of data for precomputing row normalization" help=" "/> | |
| 786 </section> | |
| 787 </when> | |
| 788 <yield/> | |
| 789 </xml> | |
| 790 <xml name="feature_selection_score_function"> | |
| 791 <param argument="score_func" type="select" label="Select a score function"> | |
| 792 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option> | |
| 793 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option> | |
| 794 <option value="f_regression">f_regression - Univariate linear regression tests</option> | |
| 795 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> | |
| 796 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> | |
| 797 </param> | |
| 798 </xml> | |
| 799 <xml name="feature_selection_estimator"> | |
| 800 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built."> | |
| 801 <option value="svm.SVR(kernel="linear")">svm.SVR(kernel="linear")</option> | |
| 802 <option value="svm.SVC(kernel="linear")">svm.SVC(kernel="linear")</option> | |
| 803 <option value="svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)">svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)</option> | |
| 804 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option> | |
| 805 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option> | |
| 806 </param> | |
| 807 </xml> | |
| 808 <xml name="feature_selection_extra_estimator"> | |
| 809 <param name="has_estimator" type="select" label="Does your estimator on the list above?"> | |
| 810 <option value="yes">Yes, my estimator is on the list</option> | |
| 811 <option value="no">No, I need make a new estimator</option> | |
| 812 <yield/> | |
| 813 </param> | |
| 814 </xml> | |
| 815 <xml name="feature_selection_estimator_choices"> | |
| 816 <when value="yes"> | |
| 817 </when> | |
| 818 <when value="no"> | |
| 819 <param name="new_estimator" type="text" value="" label="Make a new estimator" /> | |
| 820 </when> | |
| 821 <yield/> | |
| 822 </xml> | |
| 823 <xml name="feature_selection_methods"> | |
| 824 <conditional name="select_methods"> | |
| 825 <param name="selected_method" type="select" label="Select an operation"> | |
| 826 <option value="fit_transform">fit_transform - Fit to data, then transform it</option> | |
| 827 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> | |
| 828 </param> | |
| 829 <when value="fit_transform"> | |
| 830 <!--**fit_params--> | |
| 831 </when> | |
| 832 <when value="get_support"> | |
| 833 <param name="indices" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Indices" help="If True, the return value will be an array of integers, rather than a boolean mask."/> | |
| 834 </when> | |
| 835 </conditional> | |
| 836 </xml> | |
| 837 | |
| 838 <!-- Outputs --> | |
| 839 | |
| 840 <xml name="output"> | |
| 841 <outputs> | |
| 842 <data format="tabular" name="outfile_predict"> | |
| 843 <filter>selected_tasks['selected_task'] == 'load'</filter> | |
| 844 </data> | |
| 845 <data format="zip" name="outfile_fit"> | |
| 846 <filter>selected_tasks['selected_task'] == 'train'</filter> | |
| 847 </data> | |
| 848 </outputs> | |
| 849 </xml> | |
| 850 | |
| 851 | |
| 852 <!--Citations--> | |
| 853 <xml name="eden_citation"> | |
| 854 <citations> | |
| 855 <citation type="doi">10.5281/zenodo.15094</citation> | |
| 856 </citations> | |
| 857 </xml> | |
| 858 | |
| 859 <xml name="sklearn_citation"> | |
| 860 <citations> | |
| 861 <citation type="bibtex"> | |
| 862 @article{scikit-learn, | |
| 863 title={Scikit-learn: Machine Learning in {P}ython}, | |
| 864 author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
| 865 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
| 866 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
| 867 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
| 868 journal={Journal of Machine Learning Research}, | |
| 869 volume={12}, | |
| 870 pages={2825--2830}, | |
| 871 year={2011} | |
| 872 url = {https://github.com/scikit-learn/scikit-learn} | |
| 873 } | |
| 874 </citation> | |
| 875 </citations> | |
| 876 </xml> | |
| 877 | |
| 878 <xml name="scipy_citation"> | |
| 879 <citations> | |
| 880 <citation type="bibtex"> | |
| 881 @Misc{, | |
| 882 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, | |
| 883 title = {{SciPy}: Open source scientific tools for {Python}}, | |
| 884 year = {2001--}, | |
| 885 url = "http://www.scipy.org/", | |
| 886 note = {[Online; accessed 2016-04-09]} | |
| 887 } | |
| 888 </citation> | |
| 889 </citations> | |
| 890 </xml> | |
| 891 | |
| 892 </macros> |
