Mercurial > repos > bgruening > sklearn_data_preprocess
comparison pre_process.xml @ 0:12b2bef577d0 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a349cb4673231f12344e418513a08691925565d9
| author | bgruening |
|---|---|
| date | Fri, 03 Jun 2016 13:56:11 -0400 |
| parents | |
| children | 43075be4044b |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:12b2bef577d0 |
|---|---|
| 1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@"> | |
| 2 <description>raw feature vectors into standardized datasets</description> | |
| 3 <macros> | |
| 4 <import>main_macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="python_requirements"/> | |
| 7 <expand macro="macro_stdio"/> | |
| 8 <version_command>echo "@VERSION@"</version_command> | |
| 9 <command> | |
| 10 <![CDATA[ | |
| 11 python "$pre_processor_script" '$inputs' | |
| 12 ]]> | |
| 13 </command> | |
| 14 <configfiles> | |
| 15 <inputs name="inputs" /> | |
| 16 <configfile name="pre_processor_script"> | |
| 17 <![CDATA[ | |
| 18 import sys | |
| 19 import json | |
| 20 import pandas | |
| 21 import pickle | |
| 22 import numpy as np | |
| 23 from scipy.io import mmread | |
| 24 from scipy.io import mmwrite | |
| 25 from sklearn import preprocessing | |
| 26 | |
| 27 input_json_path = sys.argv[1] | |
| 28 params = json.load(open(input_json_path, "r")) | |
| 29 | |
| 30 #if $input_type.selected_input_type == "sparse": | |
| 31 X = mmread(open("$infile", 'r')) | |
| 32 #else: | |
| 33 X = pandas.read_csv("$infile", sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) | |
| 34 #end if | |
| 35 | |
| 36 #if $input_type.pre_processors.infile_transform.ext == 'txt': | |
| 37 y = mmread(open("$infile", 'r')) | |
| 38 #else: | |
| 39 y = pandas.read_csv("$infile", sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) | |
| 40 #end if | |
| 41 | |
| 42 preprocessor = params["input_type"]["pre_processors"]["selected_pre_processor"] | |
| 43 options = params["input_type"]["pre_processors"]["options"] | |
| 44 | |
| 45 my_class = getattr(preprocessing, preprocessor) | |
| 46 estimator = my_class(**options) | |
| 47 estimator.fit(X) | |
| 48 result = estimator.transform(y) | |
| 49 | |
| 50 #if $input_type.pre_processors.infile_transform.ext == 'txt': | |
| 51 mmwrite(open("$outfile_transform" , 'w+'), result) | |
| 52 #else: | |
| 53 res = pandas.DataFrame(result) | |
| 54 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", index=False, header=None) | |
| 55 #end if | |
| 56 | |
| 57 #if $save: | |
| 58 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) | |
| 59 #end if | |
| 60 ]]> | |
| 61 </configfile> | |
| 62 </configfiles> | |
| 63 <inputs> | |
| 64 <conditional name="input_type"> | |
| 65 <param name="selected_input_type" type="select" label="Select the type of your input data:"> | |
| 66 <option value="tabular" selected="true">Tabular</option> | |
| 67 <option value="sparse">Sparse</option> | |
| 68 </param> | |
| 69 <when value="tabular"> | |
| 70 <param name="infile" type="data" format="tabular" label="Select a tabular file you want to train your preprocessor on its data:"/> | |
| 71 <conditional name="pre_processors"> | |
| 72 <expand macro="sparse_preprocessors"> | |
| 73 <option value="KernelCenterer">Kernel Centerer (Centers a kernel matrix)</option> | |
| 74 <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option> | |
| 75 <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option> | |
| 76 <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option> | |
| 77 </expand> | |
| 78 <expand macro="sparse_preprocessor_options"> | |
| 79 <when value="KernelCenterer"> | |
| 80 <expand macro="multitype_input"/> | |
| 81 <section name="options" title="Advanced Options" expanded="False"> | |
| 82 </section> | |
| 83 </when> | |
| 84 <when value="MinMaxScaler"> | |
| 85 <expand macro="multitype_input"/> | |
| 86 <section name="options" title="Advanced Options" expanded="False"> | |
| 87 <!--feature_range--> | |
| 88 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing normalization" help=" "/> | |
| 89 </section> | |
| 90 </when> | |
| 91 <when value="PolynomialFeatures"> | |
| 92 <expand macro="multitype_input"/> | |
| 93 <section name="options" title="Advanced Options" expanded="False"> | |
| 94 <param argument="degree" type="integer" optional="true" value="2" label="The degree of the polynomial features " help=""/> | |
| 95 <param argument="interaction_only" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Produce interaction features only" help="(Features that are products of at most degree distinct input features) "/> | |
| 96 <param argument="include_bias" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Include a bias column" help="Feature in which all polynomial powers are zero "/> | |
| 97 </section> | |
| 98 </when> | |
| 99 <when value="RobustScaler"> | |
| 100 <expand macro="multitype_input"/> | |
| 101 <section name="options" title="Advanced Options" expanded="False"> | |
| 102 <!--=True, =True, copy=True--> | |
| 103 <param argument="with_centering" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Center the data before scaling" help=" "/> | |
| 104 <param argument="with_scaling" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Scale the data to interquartile range" help=" "/> | |
| 105 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for inplace scaling" help=" "/> | |
| 106 </section> | |
| 107 </when> | |
| 108 </expand> | |
| 109 </conditional> | |
| 110 </when> | |
| 111 <when value="sparse"> | |
| 112 <param name="infile" type="data" format="txt" label="Select a sparse representation you want to train your preprocessor on its data:"/> | |
| 113 <conditional name="pre_processors"> | |
| 114 <expand macro="sparse_preprocessors"/> | |
| 115 <expand macro="sparse_preprocessor_options"/> | |
| 116 </conditional> | |
| 117 </when> | |
| 118 </conditional> | |
| 119 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations."/> | |
| 120 </inputs> | |
| 121 <outputs> | |
| 122 <data format="tabular" name="outfile_transform" from_work_dir="./output"/> | |
| 123 <data format="zip" name="outfile_fit"> | |
| 124 <filter>save</filter> | |
| 125 </data> | |
| 126 </outputs> | |
| 127 <tests> | |
| 128 <test> | |
| 129 <param name="infile" value="train.tabular" ftype="tabular"/> | |
| 130 <param name="infile_transform" value="train.tabular" ftype="tabular"/> | |
| 131 <param name="selected_input_type" value="tabular"/> | |
| 132 <param name="selected_pre_processor" value="KernelCenterer"/> | |
| 133 <param name="save" value="true"/> | |
| 134 <output name="outfile_transform" file="prp_result01" ftype="tabular"/> | |
| 135 <output name="outfile_fit" file="prp_model01" ftype="zip" compare="sim_size" delta="500"/> | |
| 136 </test> | |
| 137 <test> | |
| 138 <param name="infile" value="train.tabular" ftype="tabular"/> | |
| 139 <param name="infile_transform" value="train.tabular" ftype="tabular"/> | |
| 140 <param name="selected_input_type" value="tabular"/> | |
| 141 <param name="selected_pre_processor" value="MinMaxScaler"/> | |
| 142 <param name="save" value="true"/> | |
| 143 <output name="outfile_transform" file="prp_result02" ftype="tabular"/> | |
| 144 <output name="outfile_fit" file="prp_model02" ftype="zip" compare="sim_size" delta="500"/> | |
| 145 </test> | |
| 146 <test> | |
| 147 <param name="infile" value="train.tabular" ftype="tabular"/> | |
| 148 <param name="infile_transform" value="train.tabular" ftype="tabular"/> | |
| 149 <param name="selected_input_type" value="tabular"/> | |
| 150 <param name="selected_pre_processor" value="PolynomialFeatures"/> | |
| 151 <param name="save" value="true"/> | |
| 152 <output name="outfile_transform" file="prp_result03" ftype="tabular"/> | |
| 153 <output name="outfile_fit" file="prp_model03" ftype="zip" compare="sim_size" delta="500"/> | |
| 154 </test> | |
| 155 <test> | |
| 156 <param name="infile" value="train.tabular" ftype="tabular"/> | |
| 157 <param name="infile_transform" value="train.tabular" ftype="tabular"/> | |
| 158 <param name="selected_input_type" value="tabular"/> | |
| 159 <param name="selected_pre_processor" value="RobustScaler"/> | |
| 160 <param name="save" value="true"/> | |
| 161 <output name="outfile_transform" file="prp_result04" ftype="tabular"/> | |
| 162 <output name="outfile_fit" file="prp_model04" ftype="zip" compare="sim_size" delta="500"/> | |
| 163 </test> | |
| 164 <test> | |
| 165 <param name="infile" value="csr_sparse2.mtx" ftype="txt"/> | |
| 166 <param name="infile_transform" value="csr_sparse2.mtx" ftype="txt"/> | |
| 167 <param name="selected_input_type" value="sparse"/> | |
| 168 <param name="selected_pre_processor" value="Binarizer"/> | |
| 169 <param name="save" value="true"/> | |
| 170 <output name="outfile_transform" file="prp_result05" ftype="tabular"/> | |
| 171 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="500"/> | |
| 172 </test> | |
| 173 <test> | |
| 174 <param name="infile" value="csr_sparse2.mtx" ftype="txt"/> | |
| 175 <param name="infile_transform" value="csr_sparse2.mtx" ftype="txt"/> | |
| 176 <param name="selected_input_type" value="sparse"/> | |
| 177 <param name="selected_pre_processor" value="Imputer"/> | |
| 178 <param name="save" value="true"/> | |
| 179 <param name="axis" value="true"/> | |
| 180 <output name="outfile_transform" file="prp_result06" ftype="tabular"/> | |
| 181 <output name="outfile_fit" file="prp_model06" ftype="zip" compare="sim_size" delta="500"/> | |
| 182 </test> | |
| 183 <test> | |
| 184 <param name="infile" value="train.tabular" ftype="tabular"/> | |
| 185 <param name="infile_transform" value="train.tabular" ftype="tabular"/> | |
| 186 <param name="selected_input_type" value="tabular"/> | |
| 187 <param name="selected_pre_processor" value="StandardScaler"/> | |
| 188 <param name="save" value="true"/> | |
| 189 <output name="outfile_transform" file="prp_result07" ftype="tabular"/> | |
| 190 <output name="outfile_fit" file="prp_model07" ftype="zip" compare="sim_size" delta="500"/> | |
| 191 </test> | |
| 192 <test> | |
| 193 <param name="infile" value="csr_sparse2.mtx" ftype="txt"/> | |
| 194 <param name="infile_transform" value="csr_sparse2.mtx" ftype="txt"/> | |
| 195 <param name="selected_input_type" value="sparse"/> | |
| 196 <param name="selected_pre_processor" value="MaxAbsScaler"/> | |
| 197 <param name="save" value="true"/> | |
| 198 <output name="outfile_transform" file="prp_result08" ftype="tabular"/> | |
| 199 <output name="outfile_fit" file="prp_model08" ftype="zip" compare="sim_size" delta="500"/> | |
| 200 </test> | |
| 201 <test> | |
| 202 <param name="infile" value="csr_sparse2.mtx" ftype="txt"/> | |
| 203 <param name="infile_transform" value="csr_sparse2.mtx" ftype="txt"/> | |
| 204 <param name="selected_input_type" value="sparse"/> | |
| 205 <param name="selected_pre_processor" value="Normalizer"/> | |
| 206 <param name="save" value="true"/> | |
| 207 <output name="outfile_transform" file="prp_result09" ftype="tabular"/> | |
| 208 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="500"/> | |
| 209 </test> | |
| 210 </tests> | |
| 211 <help> | |
| 212 <![CDATA[ | |
| 213 **What it does** | |
| 214 | |
| 215 This tool provides several transformer classes to change raw feature vectors into a representation that is more suitable for the downstream estimators. The library is provided by sklearn.preprocessing package. | |
| 216 | |
| 217 For information about preprocessing classes and parameter settings please refer to `Scikit-learn preprocessing`_. | |
| 218 | |
| 219 .. _`Scikit-learn preprocessing`: http://scikit-learn.org/stable/modules/preprocessing.html | |
| 220 ]]> | |
| 221 </help> | |
| 222 <expand macro="sklearn_citation"/> | |
| 223 </tool> |
