Mercurial > repos > bgruening > sklearn_sample_generator

diff sample_generator.xml @ 5:01fdef045550 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
author: bgruening
date: Fri, 16 Feb 2018 09:12:40 -0500
parents: f58c35905608
children: 18df9a8cbda3
--- a/sample_generator.xml	Thu Jun 23 15:27:45 2016 -0400
+++ b/sample_generator.xml	Fri Feb 16 09:12:40 2018 -0500
@@ -108,7 +108,7 @@
                     <expand macro="n_samples" default_value="12000"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>           
+            </when>
             <when value="make_circles">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
@@ -146,7 +146,7 @@
                     <expand macro="n_features" default_value="10"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>            
+            </when>
             <when value="make_friedman1">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
@@ -154,35 +154,35 @@
                     <expand macro="noise"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>            
+            </when>
             <when value="make_friedman2">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
                     <expand macro="noise"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>            
+            </when>
             <when value="make_friedman3">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
                     <expand macro="noise"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>            
+            </when>
             <!--when value="make_low_rank_matrix">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
                     <expand macro="n_features" default_value="100"/>
                     <expand macro="random_state"/>
                 </section>
-            </when-->            
+            </when-->
             <!--when value="make_multilabel_classification">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
                     <expand macro="n_features" default_value="20"/>
                     <expand macro="random_state"/>
                 </section>
-            </when-->            
+            </when-->
             <when value="make_s_curve">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples"/>
@@ -196,7 +196,7 @@
                     <expand macro="noise"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>            
+            </when>
             <!--when value="make_sparse_coded_signal">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="n_samples" default_value=""/>
@@ -208,19 +208,19 @@
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="random_state"/>
                 </section>
-            </when-->            
+            </when-->
             <!--when value="make_sparse_spd_matrix">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="random_state"/>
                 </section>
-            </when-->            
+            </when-->
             <!--when value="make_biclusters">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="shuffle" label="Shuffle the samples"/>
                     <expand macro="noise"/>
                     <expand macro="random_state"/>
                 </section>
-            </when>            
+            </when>
             <when value="make_checkerboard">
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="shuffle" label="Shuffle the samples"/>
@@ -242,7 +242,7 @@
         <test>
             <param name="selected_generator" value="make_classification"/>
             <param name="random_state" value="100"/>
-            <output name="outfile" file="class.txt"/>
+            <output name="outfile" file="class.txt" compare="sim_size" />
         </test>
         <test>
             <param name="selected_generator" value="make_circles"/>
@@ -282,7 +282,7 @@
         <test>
             <param name="selected_generator" value="make_regression"/>
             <param name="random_state" value="100"/>
-            <output name="outfile" file="regression.txt"/>
+            <output name="outfile" file="regression.txt" compare="sim_size" />
         </test>
         <test>
             <param name="selected_generator" value="make_s_curve"/>
@@ -312,8 +312,8 @@
 **1 - Single_label classification and clustering**
 
  These generators produce a file containing the data samples. It is a tabular representation with samples in rows having features in columns.
- (In machine learning, each numerical property of a sample is called a feature.) 
- The corresponding discrete targets are generated in a separate column. This column is added as the last coulmn of the data. 
+ (In machine learning, each numerical property of a sample is called a feature.)
+ The corresponding discrete targets are generated in a separate column. This column is added as the last coulmn of the data.
 
 
  **Example**
@@ -322,7 +322,7 @@
 
  features columns
  ::
- 
+
   4.01163365529    -6.10797684314    8.29829894763     -9.10139563721
   10.0788438916    1.59539821454     10.0684278289     4.16975127881
   -5.17607775503   -0.878286135332   6.92941850665     -5.27083063186
@@ -348,22 +348,22 @@
 
 
    * **Isotropic Gaussian blobs for clustering** creates multiclass datasets by allocating each class one or more normally-distributed clusters of points (isotropic = equally distributed in all directions). It provides control regarding the centers and standard deviations of each cluster, and is used to demonstrate clustering.
-   
+
    * **Random n-class classification problem** does the same specialising in introducing noise by way of: correlated, redundant and uninformative features; multiple Gaussian clusters per class; and linear transformations of the feature space.
-   
+
    * **Isotropic Gaussian and label samples by quantile** divides a single Gaussian cluster into near-equal-size classes separated by concentric hyperspheres.
-   
+
    * **Data for binary classification (Hastie)** generates a binary problem similar to the above with 10 features.
-   
+
    * **Circles** and **moons** generate 2-dimensional binary classification datasets that are challenging to certain algorithms (e.g. centroid-based clustering or linear classification), including optional Gaussian noise. They are useful for visualisation.
 
 **2 - Generators for regression**
 
  These generators produce output with same same format as in section 1, thoguh aimed for regression problems. The following generators are included in this section:
 
-  * **Random regression problem** produces regression targets as an optionally-sparse random linear combination of random features, with noise. Its informative features may be uncorrelated, or low rank (few features account for most of the variance). It can produce multiple targets for each point. 
+  * **Random regression problem** produces regression targets as an optionally-sparse random linear combination of random features, with noise. Its informative features may be uncorrelated, or low rank (few features account for most of the variance). It can produce multiple targets for each point.
 
-  * **Random regression problem with sparse uncorrelated design** produces a target as a linear combination of four features with fixed coefficients. 
+  * **Random regression problem with sparse uncorrelated design** produces a target as a linear combination of four features with fixed coefficients.
 
   * **Nonlinear generators** encode explicitly non-linear relations: **“Friedman #1”** is related by polynomial and sine transforms; **“Friedman #2”** includes feature multiplication and reciprocation; and **“Friedman #3”** is similar with an arctan transformation on the target.
author	bgruening
date	Fri, 16 Feb 2018 09:12:40 -0500
parents	f58c35905608
children	18df9a8cbda3