Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 0:1e778f5614bf draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2e1e78576b38110cf5b1f2ed83b08b9c3a6cbfee
| author | bgruening |
|---|---|
| date | Sat, 28 Apr 2018 18:06:10 -0400 |
| parents | |
| children | eb4a0fccbb3f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1e778f5614bf |
|---|---|
| 1 <tool id="sklearn_model_validation" name="Model Validation" version="@VERSION@"> | |
| 2 <description>evaluates estimator performance by cross-validation</description> | |
| 3 <macros> | |
| 4 <import>main_macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="python_requirements"/> | |
| 7 <expand macro="macro_stdio"/> | |
| 8 <version_command>echo "@VERSION@"</version_command> | |
| 9 <command> | |
| 10 <![CDATA[ | |
| 11 python "$sklearn_model_validation_script" '$inputs' | |
| 12 ]]> | |
| 13 </command> | |
| 14 <configfiles> | |
| 15 <inputs name="inputs" /> | |
| 16 <configfile name="sklearn_model_validation_script"> | |
| 17 <![CDATA[ | |
| 18 import sys | |
| 19 import json | |
| 20 import pandas | |
| 21 import pickle | |
| 22 import numpy as np | |
| 23 import sklearn.model_selection | |
| 24 from sklearn import svm, linear_model, ensemble | |
| 25 | |
| 26 @COLUMNS_FUNCTION@ | |
| 27 | |
| 28 input_json_path = sys.argv[1] | |
| 29 params = json.load(open(input_json_path, "r")) | |
| 30 | |
| 31 input_type = params["input_options"]["selected_input"] | |
| 32 if input_type=="tabular": | |
| 33 header = 'infer' if params["input_options"]["header1"] else None | |
| 34 X = read_columns( | |
| 35 "$input_options.infile1", | |
| 36 "$input_options.col1", | |
| 37 sep='\t', | |
| 38 header=header, | |
| 39 parse_dates=True | |
| 40 ) | |
| 41 else: | |
| 42 X = mmread(open("$input_options.infile1", 'r')) | |
| 43 | |
| 44 header = 'infer' if params["input_options"]["header2"] else None | |
| 45 y = read_columns( | |
| 46 "$input_options.infile2", | |
| 47 "$input_options.col2", | |
| 48 sep='\t', | |
| 49 header=header, | |
| 50 parse_dates=True | |
| 51 ) | |
| 52 y=y.ravel() | |
| 53 | |
| 54 validator = params["model_validation_functions"]["selected_function"] | |
| 55 validator = getattr(sklearn.model_selection, validator) | |
| 56 options = params["model_validation_functions"]["options"] | |
| 57 if 'scoring' in options and options['scoring'] == '': | |
| 58 options['scoring'] = None | |
| 59 | |
| 60 estimator=params["model_validation_functions"]["estimator"] | |
| 61 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': | |
| 62 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] | |
| 63 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | |
| 64 | |
| 65 #if $model_validation_functions.selected_function == 'cross_validate': | |
| 66 res = validator(estimator, X, y, **options) | |
| 67 rval = res["$model_validation_functions.return_type"] | |
| 68 | |
| 69 #elif $model_validation_functions.selected_function == 'learning_curve': | |
| 70 options['train_sizes'] = eval(options['train_sizes']) | |
| 71 train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options) | |
| 72 rval = eval("$model_validation_functions.return_type") | |
| 73 | |
| 74 #elif $model_validation_functions.selected_function == 'permutation_test_score': | |
| 75 score, permutation_scores, pvalue = validator(estimator, X, y, **options) | |
| 76 rval = eval("$model_validation_functions.return_type") | |
| 77 if "$model_validation_functions.return_type" in ["score", "pvalue"]: | |
| 78 rval = [rval] | |
| 79 | |
| 80 #elif $model_validation_functions.selected_function == 'validation_curve': | |
| 81 options['param_range'] = eval(options['param_range']) | |
| 82 train_scores, test_scores = validator(estimator, X, y, **options) | |
| 83 rval = eval("$model_validation_functions.return_type") | |
| 84 | |
| 85 #else: | |
| 86 rval = validator(estimator, X, y, **options) | |
| 87 #end if | |
| 88 | |
| 89 rval = pandas.DataFrame(rval) | |
| 90 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) | |
| 91 | |
| 92 ]]> | |
| 93 </configfile> | |
| 94 </configfiles> | |
| 95 <inputs> | |
| 96 <conditional name="model_validation_functions"> | |
| 97 <param name="selected_function" type="select" label="Select a model validation function"> | |
| 98 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> | |
| 99 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option> | |
| 100 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option> | |
| 101 <option value="learning_curve">learning_curve - Learning curve</option> | |
| 102 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option> | |
| 103 <option value="validation_curve">validation_curve - Validation curve</option> | |
| 104 </param> | |
| 105 <when value="cross_validate"> | |
| 106 <expand macro="feature_selection_estimator" /> | |
| 107 <conditional name="extra_estimator"> | |
| 108 <expand macro="feature_selection_extra_estimator" /> | |
| 109 <expand macro="feature_selection_estimator_choices" /> | |
| 110 </conditional> | |
| 111 <section name="options" title="Other Options" expanded="false"> | |
| 112 <!--groups--> | |
| 113 <expand macro="model_validation_common_options"/> | |
| 114 <expand macro="scoring"/> | |
| 115 <!--fit_params--> | |
| 116 <expand macro="pre_dispatch"/> | |
| 117 </section> | |
| 118 <param name="return_type" type="select" label="Select a return type"> | |
| 119 <option value="test_score" selected="true">test_score</option> | |
| 120 <option value="train_score">train_score</option> | |
| 121 <option value="fit_time">fit_time</option> | |
| 122 <option value="score_time">score_time</option> | |
| 123 </param> | |
| 124 </when> | |
| 125 <when value="cross_val_predict"> | |
| 126 <expand macro="feature_selection_estimator" /> | |
| 127 <conditional name="extra_estimator"> | |
| 128 <expand macro="feature_selection_extra_estimator" /> | |
| 129 <expand macro="feature_selection_estimator_choices" /> | |
| 130 </conditional> | |
| 131 <section name="options" title="Other Options" expanded="false"> | |
| 132 <!--groups--> | |
| 133 <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" /> | |
| 134 <expand macro="n_jobs"/> | |
| 135 <expand macro="verbose"/> | |
| 136 <!--fit_params--> | |
| 137 <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" /> | |
| 138 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator"> | |
| 139 <option value="predict" selected="true">predict</option> | |
| 140 <option value="predict_proba">predict_proba</option> | |
| 141 </param> | |
| 142 </section> | |
| 143 </when> | |
| 144 <when value="cross_val_score"> | |
| 145 <expand macro="feature_selection_estimator" /> | |
| 146 <conditional name="extra_estimator"> | |
| 147 <expand macro="feature_selection_extra_estimator" /> | |
| 148 <expand macro="feature_selection_estimator_choices" /> | |
| 149 </conditional> | |
| 150 <section name="options" title="Other Options" expanded="false"> | |
| 151 <!--groups--> | |
| 152 <expand macro="model_validation_common_options"/> | |
| 153 <expand macro="scoring"/> | |
| 154 <!--fit_params--> | |
| 155 <expand macro="pre_dispatch"/> | |
| 156 </section> | |
| 157 </when> | |
| 158 <when value="learning_curve"> | |
| 159 <expand macro="feature_selection_estimator" /> | |
| 160 <conditional name="extra_estimator"> | |
| 161 <expand macro="feature_selection_extra_estimator" /> | |
| 162 <expand macro="feature_selection_estimator_choices" /> | |
| 163 </conditional> | |
| 164 <section name="options" title="Other Options" expanded="false"> | |
| 165 <!--groups--> | |
| 166 <expand macro="model_validation_common_options"/> | |
| 167 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/> | |
| 168 <expand macro="scoring"/> | |
| 169 <param argument="exploit_incremental_learning" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="exploit_incremental_learning" help="Whether to apply incremental learning to speed up fitting of the estimator if supported"/> | |
| 170 <expand macro="pre_dispatch"/> | |
| 171 <expand macro="shuffle" checked="false" label="shuffle" help="Whether to shuffle training data before taking prefixes"/> | |
| 172 <expand macro="random_state"/> | |
| 173 </section> | |
| 174 <param name="return_type" type="select" label="Select a return type"> | |
| 175 <option value="train_sizes_abs" selected="true">train_sizes_abs</option> | |
| 176 <option value="train_scores">train_scores</option> | |
| 177 <option value="test_scores">test_scores</option> | |
| 178 </param> | |
| 179 </when> | |
| 180 <when value="permutation_test_score"> | |
| 181 <expand macro="feature_selection_estimator" /> | |
| 182 <conditional name="extra_estimator"> | |
| 183 <expand macro="feature_selection_extra_estimator" /> | |
| 184 <expand macro="feature_selection_estimator_choices" /> | |
| 185 </conditional> | |
| 186 <section name="options" title="Other Options" expanded="false"> | |
| 187 <!--groups--> | |
| 188 <expand macro="model_validation_common_options"/> | |
| 189 <expand macro="scoring"/> | |
| 190 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/> | |
| 191 <expand macro="random_state"/> | |
| 192 </section> | |
| 193 <param name="return_type" type="select" label="Select a return type"> | |
| 194 <option value="score" selected="true">score</option> | |
| 195 <option value="permutation_scores">permutation_scores</option> | |
| 196 <option value="pvalue">pvalue</option> | |
| 197 </param> | |
| 198 </when> | |
| 199 <when value="validation_curve"> | |
| 200 <expand macro="feature_selection_estimator" /> | |
| 201 <conditional name="extra_estimator"> | |
| 202 <expand macro="feature_selection_extra_estimator" /> | |
| 203 <expand macro="feature_selection_estimator_choices" /> | |
| 204 </conditional> | |
| 205 <section name="options" title="Other Options" expanded="false"> | |
| 206 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/> | |
| 207 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/> | |
| 208 <!--groups--> | |
| 209 <expand macro="model_validation_common_options"/> | |
| 210 <expand macro="scoring"/> | |
| 211 <expand macro="pre_dispatch"/> | |
| 212 </section> | |
| 213 <param name="return_type" type="select" label="Select a return type"> | |
| 214 <option value="train_scores" selected="true">train_scores</option> | |
| 215 <option value="test_scores">test_scores</option> | |
| 216 </param> | |
| 217 </when> | |
| 218 </conditional> | |
| 219 <expand macro="sl_mixed_input"/> | |
| 220 </inputs> | |
| 221 <outputs> | |
| 222 <data format="tabular" name="outfile"/> | |
| 223 </outputs> | |
| 224 <tests> | |
| 225 <test> | |
| 226 <param name="selected_function" value="cross_validate"/> | |
| 227 <param name="estimator" value="linear_model.LassoCV()"/> | |
| 228 <param name="has_estimator" value="yes"/> | |
| 229 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
| 230 <param name="col1" value="1,2,3,4,5"/> | |
| 231 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
| 232 <param name="col2" value="6"/> | |
| 233 <output name="outfile" file="mv_result01.tabular"/> | |
| 234 </test> | |
| 235 <test> | |
| 236 <param name="selected_function" value="cross_val_predict"/> | |
| 237 <param name="estimator" value="linear_model.LassoCV()"/> | |
| 238 <param name="has_estimator" value="yes"/> | |
| 239 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
| 240 <param name="col1" value="1,2,3,4,5"/> | |
| 241 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
| 242 <param name="col2" value="6"/> | |
| 243 <output name="outfile" file="mv_result02.tabular"/> | |
| 244 </test> | |
| 245 <test> | |
| 246 <param name="selected_function" value="cross_val_score"/> | |
| 247 <param name="estimator" value="linear_model.LassoCV()"/> | |
| 248 <param name="has_estimator" value="yes"/> | |
| 249 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
| 250 <param name="col1" value="1,2,3,4,5"/> | |
| 251 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
| 252 <param name="col2" value="6"/> | |
| 253 <output name="outfile" file="mv_result03.tabular"/> | |
| 254 </test> | |
| 255 <test> | |
| 256 <param name="selected_function" value="learning_curve"/> | |
| 257 <param name="estimator" value="linear_model.LassoCV()"/> | |
| 258 <param name="has_estimator" value="yes"/> | |
| 259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
| 260 <param name="header1" value="true" /> | |
| 261 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
| 262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
| 263 <param name="header2" value="true" /> | |
| 264 <param name="col2" value="1"/> | |
| 265 <output name="outfile" file="mv_result04.tabular"/> | |
| 266 </test> | |
| 267 <test> | |
| 268 <param name="selected_function" value="permutation_test_score"/> | |
| 269 <param name="estimator" value="linear_model.LassoCV()"/> | |
| 270 <param name="has_estimator" value="yes"/> | |
| 271 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
| 272 <param name="col1" value="1,2,3,4,5"/> | |
| 273 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
| 274 <param name="col2" value="6"/> | |
| 275 <output name="outfile" file="mv_result05.tabular"/> | |
| 276 </test> | |
| 277 <test> | |
| 278 <param name="selected_function" value="validation_curve"/> | |
| 279 <param name="estimator" value="svm.SVC(kernel="linear")"/> | |
| 280 <param name="has_estimator" value="yes"/> | |
| 281 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
| 282 <param name="header1" value="true" /> | |
| 283 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
| 284 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
| 285 <param name="header2" value="true" /> | |
| 286 <param name="col2" value="1"/> | |
| 287 <param name="return_type" value="test_scores"/> | |
| 288 <output name="outfile" file="mv_result06.tabular"/> | |
| 289 </test> | |
| 290 </tests> | |
| 291 <help> | |
| 292 <![CDATA[ | |
| 293 **What it does** | |
| 294 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on | |
| 295 sklearn.model_selection package. | |
| 296 For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_. | |
| 297 | |
| 298 .. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics | |
| 299 ]]> | |
| 300 </help> | |
| 301 <expand macro="sklearn_citation"/> | |
| 302 </tool> |
