Mercurial > repos > bgruening > sklearn_sample_generator
comparison sample_generator.xml @ 5:01fdef045550 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
| author | bgruening |
|---|---|
| date | Fri, 16 Feb 2018 09:12:40 -0500 |
| parents | f58c35905608 |
| children | 18df9a8cbda3 |
comparison
equal
deleted
inserted
replaced
| 4:76f410ffac8f | 5:01fdef045550 |
|---|---|
| 106 <when value="make_hastie_10_2"> | 106 <when value="make_hastie_10_2"> |
| 107 <section name="options" title="Advanced Options" expanded="False"> | 107 <section name="options" title="Advanced Options" expanded="False"> |
| 108 <expand macro="n_samples" default_value="12000"/> | 108 <expand macro="n_samples" default_value="12000"/> |
| 109 <expand macro="random_state"/> | 109 <expand macro="random_state"/> |
| 110 </section> | 110 </section> |
| 111 </when> | 111 </when> |
| 112 <when value="make_circles"> | 112 <when value="make_circles"> |
| 113 <section name="options" title="Advanced Options" expanded="False"> | 113 <section name="options" title="Advanced Options" expanded="False"> |
| 114 <expand macro="n_samples"/> | 114 <expand macro="n_samples"/> |
| 115 <expand macro="shuffle" label="Shuffle the samples"/> | 115 <expand macro="shuffle" label="Shuffle the samples"/> |
| 116 <expand macro="noise" default_value=""/> | 116 <expand macro="noise" default_value=""/> |
| 144 <section name="options" title="Advanced Options" expanded="False"> | 144 <section name="options" title="Advanced Options" expanded="False"> |
| 145 <expand macro="n_samples"/> | 145 <expand macro="n_samples"/> |
| 146 <expand macro="n_features" default_value="10"/> | 146 <expand macro="n_features" default_value="10"/> |
| 147 <expand macro="random_state"/> | 147 <expand macro="random_state"/> |
| 148 </section> | 148 </section> |
| 149 </when> | 149 </when> |
| 150 <when value="make_friedman1"> | 150 <when value="make_friedman1"> |
| 151 <section name="options" title="Advanced Options" expanded="False"> | 151 <section name="options" title="Advanced Options" expanded="False"> |
| 152 <expand macro="n_samples"/> | 152 <expand macro="n_samples"/> |
| 153 <expand macro="n_features" default_value="10"/> | 153 <expand macro="n_features" default_value="10"/> |
| 154 <expand macro="noise"/> | 154 <expand macro="noise"/> |
| 155 <expand macro="random_state"/> | 155 <expand macro="random_state"/> |
| 156 </section> | 156 </section> |
| 157 </when> | 157 </when> |
| 158 <when value="make_friedman2"> | 158 <when value="make_friedman2"> |
| 159 <section name="options" title="Advanced Options" expanded="False"> | 159 <section name="options" title="Advanced Options" expanded="False"> |
| 160 <expand macro="n_samples"/> | 160 <expand macro="n_samples"/> |
| 161 <expand macro="noise"/> | 161 <expand macro="noise"/> |
| 162 <expand macro="random_state"/> | 162 <expand macro="random_state"/> |
| 163 </section> | 163 </section> |
| 164 </when> | 164 </when> |
| 165 <when value="make_friedman3"> | 165 <when value="make_friedman3"> |
| 166 <section name="options" title="Advanced Options" expanded="False"> | 166 <section name="options" title="Advanced Options" expanded="False"> |
| 167 <expand macro="n_samples"/> | 167 <expand macro="n_samples"/> |
| 168 <expand macro="noise"/> | 168 <expand macro="noise"/> |
| 169 <expand macro="random_state"/> | 169 <expand macro="random_state"/> |
| 170 </section> | 170 </section> |
| 171 </when> | 171 </when> |
| 172 <!--when value="make_low_rank_matrix"> | 172 <!--when value="make_low_rank_matrix"> |
| 173 <section name="options" title="Advanced Options" expanded="False"> | 173 <section name="options" title="Advanced Options" expanded="False"> |
| 174 <expand macro="n_samples"/> | 174 <expand macro="n_samples"/> |
| 175 <expand macro="n_features" default_value="100"/> | 175 <expand macro="n_features" default_value="100"/> |
| 176 <expand macro="random_state"/> | 176 <expand macro="random_state"/> |
| 177 </section> | 177 </section> |
| 178 </when--> | 178 </when--> |
| 179 <!--when value="make_multilabel_classification"> | 179 <!--when value="make_multilabel_classification"> |
| 180 <section name="options" title="Advanced Options" expanded="False"> | 180 <section name="options" title="Advanced Options" expanded="False"> |
| 181 <expand macro="n_samples"/> | 181 <expand macro="n_samples"/> |
| 182 <expand macro="n_features" default_value="20"/> | 182 <expand macro="n_features" default_value="20"/> |
| 183 <expand macro="random_state"/> | 183 <expand macro="random_state"/> |
| 184 </section> | 184 </section> |
| 185 </when--> | 185 </when--> |
| 186 <when value="make_s_curve"> | 186 <when value="make_s_curve"> |
| 187 <section name="options" title="Advanced Options" expanded="False"> | 187 <section name="options" title="Advanced Options" expanded="False"> |
| 188 <expand macro="n_samples"/> | 188 <expand macro="n_samples"/> |
| 189 <expand macro="noise"/> | 189 <expand macro="noise"/> |
| 190 <expand macro="random_state"/> | 190 <expand macro="random_state"/> |
| 194 <section name="options" title="Advanced Options" expanded="False"> | 194 <section name="options" title="Advanced Options" expanded="False"> |
| 195 <expand macro="n_samples"/> | 195 <expand macro="n_samples"/> |
| 196 <expand macro="noise"/> | 196 <expand macro="noise"/> |
| 197 <expand macro="random_state"/> | 197 <expand macro="random_state"/> |
| 198 </section> | 198 </section> |
| 199 </when> | 199 </when> |
| 200 <!--when value="make_sparse_coded_signal"> | 200 <!--when value="make_sparse_coded_signal"> |
| 201 <section name="options" title="Advanced Options" expanded="False"> | 201 <section name="options" title="Advanced Options" expanded="False"> |
| 202 <expand macro="n_samples" default_value=""/> | 202 <expand macro="n_samples" default_value=""/> |
| 203 <expand macro="n_features" default_value=""/> | 203 <expand macro="n_features" default_value=""/> |
| 204 <expand macro="random_state"/> | 204 <expand macro="random_state"/> |
| 206 </when--> | 206 </when--> |
| 207 <!--when value="make_spd_matrix"> | 207 <!--when value="make_spd_matrix"> |
| 208 <section name="options" title="Advanced Options" expanded="False"> | 208 <section name="options" title="Advanced Options" expanded="False"> |
| 209 <expand macro="random_state"/> | 209 <expand macro="random_state"/> |
| 210 </section> | 210 </section> |
| 211 </when--> | 211 </when--> |
| 212 <!--when value="make_sparse_spd_matrix"> | 212 <!--when value="make_sparse_spd_matrix"> |
| 213 <section name="options" title="Advanced Options" expanded="False"> | 213 <section name="options" title="Advanced Options" expanded="False"> |
| 214 <expand macro="random_state"/> | 214 <expand macro="random_state"/> |
| 215 </section> | 215 </section> |
| 216 </when--> | 216 </when--> |
| 217 <!--when value="make_biclusters"> | 217 <!--when value="make_biclusters"> |
| 218 <section name="options" title="Advanced Options" expanded="False"> | 218 <section name="options" title="Advanced Options" expanded="False"> |
| 219 <expand macro="shuffle" label="Shuffle the samples"/> | 219 <expand macro="shuffle" label="Shuffle the samples"/> |
| 220 <expand macro="noise"/> | 220 <expand macro="noise"/> |
| 221 <expand macro="random_state"/> | 221 <expand macro="random_state"/> |
| 222 </section> | 222 </section> |
| 223 </when> | 223 </when> |
| 224 <when value="make_checkerboard"> | 224 <when value="make_checkerboard"> |
| 225 <section name="options" title="Advanced Options" expanded="False"> | 225 <section name="options" title="Advanced Options" expanded="False"> |
| 226 <expand macro="shuffle" label="Shuffle the samples"/> | 226 <expand macro="shuffle" label="Shuffle the samples"/> |
| 227 <expand macro="noise"/> | 227 <expand macro="noise"/> |
| 228 <expand macro="random_state"/> | 228 <expand macro="random_state"/> |
| 240 <output name="outfile" file="blobs.txt"/> | 240 <output name="outfile" file="blobs.txt"/> |
| 241 </test> | 241 </test> |
| 242 <test> | 242 <test> |
| 243 <param name="selected_generator" value="make_classification"/> | 243 <param name="selected_generator" value="make_classification"/> |
| 244 <param name="random_state" value="100"/> | 244 <param name="random_state" value="100"/> |
| 245 <output name="outfile" file="class.txt"/> | 245 <output name="outfile" file="class.txt" compare="sim_size" /> |
| 246 </test> | 246 </test> |
| 247 <test> | 247 <test> |
| 248 <param name="selected_generator" value="make_circles"/> | 248 <param name="selected_generator" value="make_circles"/> |
| 249 <param name="random_state" value="100"/> | 249 <param name="random_state" value="100"/> |
| 250 <output name="outfile" file="circles.txt"/> | 250 <output name="outfile" file="circles.txt"/> |
| 280 <output name="outfile" file="moons.txt"/> | 280 <output name="outfile" file="moons.txt"/> |
| 281 </test> | 281 </test> |
| 282 <test> | 282 <test> |
| 283 <param name="selected_generator" value="make_regression"/> | 283 <param name="selected_generator" value="make_regression"/> |
| 284 <param name="random_state" value="100"/> | 284 <param name="random_state" value="100"/> |
| 285 <output name="outfile" file="regression.txt"/> | 285 <output name="outfile" file="regression.txt" compare="sim_size" /> |
| 286 </test> | 286 </test> |
| 287 <test> | 287 <test> |
| 288 <param name="selected_generator" value="make_s_curve"/> | 288 <param name="selected_generator" value="make_s_curve"/> |
| 289 <param name="random_state" value="100"/> | 289 <param name="random_state" value="100"/> |
| 290 <output name="outfile" file="scurve.txt"/> | 290 <output name="outfile" file="scurve.txt"/> |
| 310 | 310 |
| 311 | 311 |
| 312 **1 - Single_label classification and clustering** | 312 **1 - Single_label classification and clustering** |
| 313 | 313 |
| 314 These generators produce a file containing the data samples. It is a tabular representation with samples in rows having features in columns. | 314 These generators produce a file containing the data samples. It is a tabular representation with samples in rows having features in columns. |
| 315 (In machine learning, each numerical property of a sample is called a feature.) | 315 (In machine learning, each numerical property of a sample is called a feature.) |
| 316 The corresponding discrete targets are generated in a separate column. This column is added as the last coulmn of the data. | 316 The corresponding discrete targets are generated in a separate column. This column is added as the last coulmn of the data. |
| 317 | 317 |
| 318 | 318 |
| 319 **Example** | 319 **Example** |
| 320 Sample data with 4 features and a single target (n_samples=8 , n_features=4) : | 320 Sample data with 4 features and a single target (n_samples=8 , n_features=4) : |
| 321 | 321 |
| 322 | 322 |
| 323 features columns | 323 features columns |
| 324 :: | 324 :: |
| 325 | 325 |
| 326 4.01163365529 -6.10797684314 8.29829894763 -9.10139563721 | 326 4.01163365529 -6.10797684314 8.29829894763 -9.10139563721 |
| 327 10.0788438916 1.59539821454 10.0684278289 4.16975127881 | 327 10.0788438916 1.59539821454 10.0684278289 4.16975127881 |
| 328 -5.17607775503 -0.878286135332 6.92941850665 -5.27083063186 | 328 -5.17607775503 -0.878286135332 6.92941850665 -5.27083063186 |
| 329 4.00975406235 -7.11847496542 9.3802423585 -9.36732159584 | 329 4.00975406235 -7.11847496542 9.3802423585 -9.36732159584 |
| 330 4.61204065139 -5.71217537352 9.12509610964 -9.2260804162 | 330 4.61204065139 -5.71217537352 9.12509610964 -9.2260804162 |
| 346 | 346 |
| 347 The following generators are included in this section: | 347 The following generators are included in this section: |
| 348 | 348 |
| 349 | 349 |
| 350 * **Isotropic Gaussian blobs for clustering** creates multiclass datasets by allocating each class one or more normally-distributed clusters of points (isotropic = equally distributed in all directions). It provides control regarding the centers and standard deviations of each cluster, and is used to demonstrate clustering. | 350 * **Isotropic Gaussian blobs for clustering** creates multiclass datasets by allocating each class one or more normally-distributed clusters of points (isotropic = equally distributed in all directions). It provides control regarding the centers and standard deviations of each cluster, and is used to demonstrate clustering. |
| 351 | 351 |
| 352 * **Random n-class classification problem** does the same specialising in introducing noise by way of: correlated, redundant and uninformative features; multiple Gaussian clusters per class; and linear transformations of the feature space. | 352 * **Random n-class classification problem** does the same specialising in introducing noise by way of: correlated, redundant and uninformative features; multiple Gaussian clusters per class; and linear transformations of the feature space. |
| 353 | 353 |
| 354 * **Isotropic Gaussian and label samples by quantile** divides a single Gaussian cluster into near-equal-size classes separated by concentric hyperspheres. | 354 * **Isotropic Gaussian and label samples by quantile** divides a single Gaussian cluster into near-equal-size classes separated by concentric hyperspheres. |
| 355 | 355 |
| 356 * **Data for binary classification (Hastie)** generates a binary problem similar to the above with 10 features. | 356 * **Data for binary classification (Hastie)** generates a binary problem similar to the above with 10 features. |
| 357 | 357 |
| 358 * **Circles** and **moons** generate 2-dimensional binary classification datasets that are challenging to certain algorithms (e.g. centroid-based clustering or linear classification), including optional Gaussian noise. They are useful for visualisation. | 358 * **Circles** and **moons** generate 2-dimensional binary classification datasets that are challenging to certain algorithms (e.g. centroid-based clustering or linear classification), including optional Gaussian noise. They are useful for visualisation. |
| 359 | 359 |
| 360 **2 - Generators for regression** | 360 **2 - Generators for regression** |
| 361 | 361 |
| 362 These generators produce output with same same format as in section 1, thoguh aimed for regression problems. The following generators are included in this section: | 362 These generators produce output with same same format as in section 1, thoguh aimed for regression problems. The following generators are included in this section: |
| 363 | 363 |
| 364 * **Random regression problem** produces regression targets as an optionally-sparse random linear combination of random features, with noise. Its informative features may be uncorrelated, or low rank (few features account for most of the variance). It can produce multiple targets for each point. | 364 * **Random regression problem** produces regression targets as an optionally-sparse random linear combination of random features, with noise. Its informative features may be uncorrelated, or low rank (few features account for most of the variance). It can produce multiple targets for each point. |
| 365 | 365 |
| 366 * **Random regression problem with sparse uncorrelated design** produces a target as a linear combination of four features with fixed coefficients. | 366 * **Random regression problem with sparse uncorrelated design** produces a target as a linear combination of four features with fixed coefficients. |
| 367 | 367 |
| 368 * **Nonlinear generators** encode explicitly non-linear relations: **“Friedman #1”** is related by polynomial and sine transforms; **“Friedman #2”** includes feature multiplication and reciprocation; and **“Friedman #3”** is similar with an arctan transformation on the target. | 368 * **Nonlinear generators** encode explicitly non-linear relations: **“Friedman #1”** is related by polynomial and sine transforms; **“Friedman #2”** includes feature multiplication and reciprocation; and **“Friedman #3”** is similar with an arctan transformation on the target. |
| 369 | 369 |
| 370 **3 - Generators for manifold learning** | 370 **3 - Generators for manifold learning** |
| 371 | 371 |
