comparison sample_generator.xml @ 5:01fdef045550 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
author bgruening
date Fri, 16 Feb 2018 09:12:40 -0500
parents f58c35905608
children 18df9a8cbda3
comparison
equal deleted inserted replaced
4:76f410ffac8f 5:01fdef045550
106 <when value="make_hastie_10_2"> 106 <when value="make_hastie_10_2">
107 <section name="options" title="Advanced Options" expanded="False"> 107 <section name="options" title="Advanced Options" expanded="False">
108 <expand macro="n_samples" default_value="12000"/> 108 <expand macro="n_samples" default_value="12000"/>
109 <expand macro="random_state"/> 109 <expand macro="random_state"/>
110 </section> 110 </section>
111 </when> 111 </when>
112 <when value="make_circles"> 112 <when value="make_circles">
113 <section name="options" title="Advanced Options" expanded="False"> 113 <section name="options" title="Advanced Options" expanded="False">
114 <expand macro="n_samples"/> 114 <expand macro="n_samples"/>
115 <expand macro="shuffle" label="Shuffle the samples"/> 115 <expand macro="shuffle" label="Shuffle the samples"/>
116 <expand macro="noise" default_value=""/> 116 <expand macro="noise" default_value=""/>
144 <section name="options" title="Advanced Options" expanded="False"> 144 <section name="options" title="Advanced Options" expanded="False">
145 <expand macro="n_samples"/> 145 <expand macro="n_samples"/>
146 <expand macro="n_features" default_value="10"/> 146 <expand macro="n_features" default_value="10"/>
147 <expand macro="random_state"/> 147 <expand macro="random_state"/>
148 </section> 148 </section>
149 </when> 149 </when>
150 <when value="make_friedman1"> 150 <when value="make_friedman1">
151 <section name="options" title="Advanced Options" expanded="False"> 151 <section name="options" title="Advanced Options" expanded="False">
152 <expand macro="n_samples"/> 152 <expand macro="n_samples"/>
153 <expand macro="n_features" default_value="10"/> 153 <expand macro="n_features" default_value="10"/>
154 <expand macro="noise"/> 154 <expand macro="noise"/>
155 <expand macro="random_state"/> 155 <expand macro="random_state"/>
156 </section> 156 </section>
157 </when> 157 </when>
158 <when value="make_friedman2"> 158 <when value="make_friedman2">
159 <section name="options" title="Advanced Options" expanded="False"> 159 <section name="options" title="Advanced Options" expanded="False">
160 <expand macro="n_samples"/> 160 <expand macro="n_samples"/>
161 <expand macro="noise"/> 161 <expand macro="noise"/>
162 <expand macro="random_state"/> 162 <expand macro="random_state"/>
163 </section> 163 </section>
164 </when> 164 </when>
165 <when value="make_friedman3"> 165 <when value="make_friedman3">
166 <section name="options" title="Advanced Options" expanded="False"> 166 <section name="options" title="Advanced Options" expanded="False">
167 <expand macro="n_samples"/> 167 <expand macro="n_samples"/>
168 <expand macro="noise"/> 168 <expand macro="noise"/>
169 <expand macro="random_state"/> 169 <expand macro="random_state"/>
170 </section> 170 </section>
171 </when> 171 </when>
172 <!--when value="make_low_rank_matrix"> 172 <!--when value="make_low_rank_matrix">
173 <section name="options" title="Advanced Options" expanded="False"> 173 <section name="options" title="Advanced Options" expanded="False">
174 <expand macro="n_samples"/> 174 <expand macro="n_samples"/>
175 <expand macro="n_features" default_value="100"/> 175 <expand macro="n_features" default_value="100"/>
176 <expand macro="random_state"/> 176 <expand macro="random_state"/>
177 </section> 177 </section>
178 </when--> 178 </when-->
179 <!--when value="make_multilabel_classification"> 179 <!--when value="make_multilabel_classification">
180 <section name="options" title="Advanced Options" expanded="False"> 180 <section name="options" title="Advanced Options" expanded="False">
181 <expand macro="n_samples"/> 181 <expand macro="n_samples"/>
182 <expand macro="n_features" default_value="20"/> 182 <expand macro="n_features" default_value="20"/>
183 <expand macro="random_state"/> 183 <expand macro="random_state"/>
184 </section> 184 </section>
185 </when--> 185 </when-->
186 <when value="make_s_curve"> 186 <when value="make_s_curve">
187 <section name="options" title="Advanced Options" expanded="False"> 187 <section name="options" title="Advanced Options" expanded="False">
188 <expand macro="n_samples"/> 188 <expand macro="n_samples"/>
189 <expand macro="noise"/> 189 <expand macro="noise"/>
190 <expand macro="random_state"/> 190 <expand macro="random_state"/>
194 <section name="options" title="Advanced Options" expanded="False"> 194 <section name="options" title="Advanced Options" expanded="False">
195 <expand macro="n_samples"/> 195 <expand macro="n_samples"/>
196 <expand macro="noise"/> 196 <expand macro="noise"/>
197 <expand macro="random_state"/> 197 <expand macro="random_state"/>
198 </section> 198 </section>
199 </when> 199 </when>
200 <!--when value="make_sparse_coded_signal"> 200 <!--when value="make_sparse_coded_signal">
201 <section name="options" title="Advanced Options" expanded="False"> 201 <section name="options" title="Advanced Options" expanded="False">
202 <expand macro="n_samples" default_value=""/> 202 <expand macro="n_samples" default_value=""/>
203 <expand macro="n_features" default_value=""/> 203 <expand macro="n_features" default_value=""/>
204 <expand macro="random_state"/> 204 <expand macro="random_state"/>
206 </when--> 206 </when-->
207 <!--when value="make_spd_matrix"> 207 <!--when value="make_spd_matrix">
208 <section name="options" title="Advanced Options" expanded="False"> 208 <section name="options" title="Advanced Options" expanded="False">
209 <expand macro="random_state"/> 209 <expand macro="random_state"/>
210 </section> 210 </section>
211 </when--> 211 </when-->
212 <!--when value="make_sparse_spd_matrix"> 212 <!--when value="make_sparse_spd_matrix">
213 <section name="options" title="Advanced Options" expanded="False"> 213 <section name="options" title="Advanced Options" expanded="False">
214 <expand macro="random_state"/> 214 <expand macro="random_state"/>
215 </section> 215 </section>
216 </when--> 216 </when-->
217 <!--when value="make_biclusters"> 217 <!--when value="make_biclusters">
218 <section name="options" title="Advanced Options" expanded="False"> 218 <section name="options" title="Advanced Options" expanded="False">
219 <expand macro="shuffle" label="Shuffle the samples"/> 219 <expand macro="shuffle" label="Shuffle the samples"/>
220 <expand macro="noise"/> 220 <expand macro="noise"/>
221 <expand macro="random_state"/> 221 <expand macro="random_state"/>
222 </section> 222 </section>
223 </when> 223 </when>
224 <when value="make_checkerboard"> 224 <when value="make_checkerboard">
225 <section name="options" title="Advanced Options" expanded="False"> 225 <section name="options" title="Advanced Options" expanded="False">
226 <expand macro="shuffle" label="Shuffle the samples"/> 226 <expand macro="shuffle" label="Shuffle the samples"/>
227 <expand macro="noise"/> 227 <expand macro="noise"/>
228 <expand macro="random_state"/> 228 <expand macro="random_state"/>
240 <output name="outfile" file="blobs.txt"/> 240 <output name="outfile" file="blobs.txt"/>
241 </test> 241 </test>
242 <test> 242 <test>
243 <param name="selected_generator" value="make_classification"/> 243 <param name="selected_generator" value="make_classification"/>
244 <param name="random_state" value="100"/> 244 <param name="random_state" value="100"/>
245 <output name="outfile" file="class.txt"/> 245 <output name="outfile" file="class.txt" compare="sim_size" />
246 </test> 246 </test>
247 <test> 247 <test>
248 <param name="selected_generator" value="make_circles"/> 248 <param name="selected_generator" value="make_circles"/>
249 <param name="random_state" value="100"/> 249 <param name="random_state" value="100"/>
250 <output name="outfile" file="circles.txt"/> 250 <output name="outfile" file="circles.txt"/>
280 <output name="outfile" file="moons.txt"/> 280 <output name="outfile" file="moons.txt"/>
281 </test> 281 </test>
282 <test> 282 <test>
283 <param name="selected_generator" value="make_regression"/> 283 <param name="selected_generator" value="make_regression"/>
284 <param name="random_state" value="100"/> 284 <param name="random_state" value="100"/>
285 <output name="outfile" file="regression.txt"/> 285 <output name="outfile" file="regression.txt" compare="sim_size" />
286 </test> 286 </test>
287 <test> 287 <test>
288 <param name="selected_generator" value="make_s_curve"/> 288 <param name="selected_generator" value="make_s_curve"/>
289 <param name="random_state" value="100"/> 289 <param name="random_state" value="100"/>
290 <output name="outfile" file="scurve.txt"/> 290 <output name="outfile" file="scurve.txt"/>
310 310
311 311
312 **1 - Single_label classification and clustering** 312 **1 - Single_label classification and clustering**
313 313
314 These generators produce a file containing the data samples. It is a tabular representation with samples in rows having features in columns. 314 These generators produce a file containing the data samples. It is a tabular representation with samples in rows having features in columns.
315 (In machine learning, each numerical property of a sample is called a feature.) 315 (In machine learning, each numerical property of a sample is called a feature.)
316 The corresponding discrete targets are generated in a separate column. This column is added as the last coulmn of the data. 316 The corresponding discrete targets are generated in a separate column. This column is added as the last coulmn of the data.
317 317
318 318
319 **Example** 319 **Example**
320 Sample data with 4 features and a single target (n_samples=8 , n_features=4) : 320 Sample data with 4 features and a single target (n_samples=8 , n_features=4) :
321 321
322 322
323 features columns 323 features columns
324 :: 324 ::
325 325
326 4.01163365529 -6.10797684314 8.29829894763 -9.10139563721 326 4.01163365529 -6.10797684314 8.29829894763 -9.10139563721
327 10.0788438916 1.59539821454 10.0684278289 4.16975127881 327 10.0788438916 1.59539821454 10.0684278289 4.16975127881
328 -5.17607775503 -0.878286135332 6.92941850665 -5.27083063186 328 -5.17607775503 -0.878286135332 6.92941850665 -5.27083063186
329 4.00975406235 -7.11847496542 9.3802423585 -9.36732159584 329 4.00975406235 -7.11847496542 9.3802423585 -9.36732159584
330 4.61204065139 -5.71217537352 9.12509610964 -9.2260804162 330 4.61204065139 -5.71217537352 9.12509610964 -9.2260804162
346 346
347 The following generators are included in this section: 347 The following generators are included in this section:
348 348
349 349
350 * **Isotropic Gaussian blobs for clustering** creates multiclass datasets by allocating each class one or more normally-distributed clusters of points (isotropic = equally distributed in all directions). It provides control regarding the centers and standard deviations of each cluster, and is used to demonstrate clustering. 350 * **Isotropic Gaussian blobs for clustering** creates multiclass datasets by allocating each class one or more normally-distributed clusters of points (isotropic = equally distributed in all directions). It provides control regarding the centers and standard deviations of each cluster, and is used to demonstrate clustering.
351 351
352 * **Random n-class classification problem** does the same specialising in introducing noise by way of: correlated, redundant and uninformative features; multiple Gaussian clusters per class; and linear transformations of the feature space. 352 * **Random n-class classification problem** does the same specialising in introducing noise by way of: correlated, redundant and uninformative features; multiple Gaussian clusters per class; and linear transformations of the feature space.
353 353
354 * **Isotropic Gaussian and label samples by quantile** divides a single Gaussian cluster into near-equal-size classes separated by concentric hyperspheres. 354 * **Isotropic Gaussian and label samples by quantile** divides a single Gaussian cluster into near-equal-size classes separated by concentric hyperspheres.
355 355
356 * **Data for binary classification (Hastie)** generates a binary problem similar to the above with 10 features. 356 * **Data for binary classification (Hastie)** generates a binary problem similar to the above with 10 features.
357 357
358 * **Circles** and **moons** generate 2-dimensional binary classification datasets that are challenging to certain algorithms (e.g. centroid-based clustering or linear classification), including optional Gaussian noise. They are useful for visualisation. 358 * **Circles** and **moons** generate 2-dimensional binary classification datasets that are challenging to certain algorithms (e.g. centroid-based clustering or linear classification), including optional Gaussian noise. They are useful for visualisation.
359 359
360 **2 - Generators for regression** 360 **2 - Generators for regression**
361 361
362 These generators produce output with same same format as in section 1, thoguh aimed for regression problems. The following generators are included in this section: 362 These generators produce output with same same format as in section 1, thoguh aimed for regression problems. The following generators are included in this section:
363 363
364 * **Random regression problem** produces regression targets as an optionally-sparse random linear combination of random features, with noise. Its informative features may be uncorrelated, or low rank (few features account for most of the variance). It can produce multiple targets for each point. 364 * **Random regression problem** produces regression targets as an optionally-sparse random linear combination of random features, with noise. Its informative features may be uncorrelated, or low rank (few features account for most of the variance). It can produce multiple targets for each point.
365 365
366 * **Random regression problem with sparse uncorrelated design** produces a target as a linear combination of four features with fixed coefficients. 366 * **Random regression problem with sparse uncorrelated design** produces a target as a linear combination of four features with fixed coefficients.
367 367
368 * **Nonlinear generators** encode explicitly non-linear relations: **“Friedman #1”** is related by polynomial and sine transforms; **“Friedman #2”** includes feature multiplication and reciprocation; and **“Friedman #3”** is similar with an arctan transformation on the target. 368 * **Nonlinear generators** encode explicitly non-linear relations: **“Friedman #1”** is related by polynomial and sine transforms; **“Friedman #2”** includes feature multiplication and reciprocation; and **“Friedman #3”** is similar with an arctan transformation on the target.
369 369
370 **3 - Generators for manifold learning** 370 **3 - Generators for manifold learning**
371 371