Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 9:e4fcbbc81083 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 35fa73d6e9ba8f0789ddfb743d893d950a68af02
| author | bgruening |
|---|---|
| date | Tue, 10 Apr 2018 15:15:18 -0400 |
| parents | ea8b1c89c20b |
| children | 923ecece9e9c |
comparison
equal
deleted
inserted
replaced
| 8:ea8b1c89c20b | 9:e4fcbbc81083 |
|---|---|
| 29 | 29 |
| 30 #if $selected_tasks.selected_task == "train": | 30 #if $selected_tasks.selected_task == "train": |
| 31 | 31 |
| 32 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] | 32 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] |
| 33 options = params["selected_tasks"]["selected_algorithms"]["options"] | 33 options = params["selected_tasks"]["selected_algorithms"]["options"] |
| 34 if "select_max_features" in options: | |
| 35 if options["select_max_features"]["max_features"] == "number_input": | |
| 36 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] | |
| 37 options["select_max_features"].pop("num_max_features") | |
| 38 options["max_features"] = options["select_max_features"]["max_features"] | |
| 39 options.pop("select_max_features") | |
| 40 if "presort" in options: | |
| 41 if options["presort"] == "true": | |
| 42 options["presort"] = True | |
| 43 if options["presort"] == "false": | |
| 44 options["presort"] = False | |
| 45 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: | |
| 46 options["min_samples_leaf"] = 1 | |
| 47 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | |
| 48 options["min_samples_split"] = int(options["min_samples_split"]) | |
| 34 input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] | 49 input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] |
| 35 if input_type=="tabular": | 50 if input_type=="tabular": |
| 36 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None | 51 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None |
| 37 X = read_columns( | 52 X = read_columns( |
| 38 "$selected_tasks.selected_algorithms.input_options.infile1", | 53 "$selected_tasks.selected_algorithms.input_options.infile1", |
| 50 "$selected_tasks.selected_algorithms.input_options.col2", | 65 "$selected_tasks.selected_algorithms.input_options.col2", |
| 51 sep='\t', | 66 sep='\t', |
| 52 header=header, | 67 header=header, |
| 53 parse_dates=True | 68 parse_dates=True |
| 54 ) | 69 ) |
| 70 y=y.ravel() | |
| 55 | 71 |
| 56 my_class = getattr(sklearn.ensemble, algorithm) | 72 my_class = getattr(sklearn.ensemble, algorithm) |
| 57 estimator = my_class(**options) | 73 estimator = my_class(**options) |
| 58 estimator.fit(X,y) | 74 estimator.fit(X,y) |
| 59 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) | 75 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) |
| 60 | 76 |
| 61 #else: | 77 #else: |
| 62 classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r')) | 78 classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r')) |
| 63 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | 79 header = 'infer' if params["selected_tasks"]["header"] else None |
| 80 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | |
| 64 prediction = classifier_object.predict(data) | 81 prediction = classifier_object.predict(data) |
| 65 prediction_df = pandas.DataFrame(prediction) | 82 prediction_df = pandas.DataFrame(prediction) |
| 66 res = pandas.concat([data, prediction_df], axis=1) | 83 res = pandas.concat([data, prediction_df], axis=1) |
| 67 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) | 84 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) |
| 68 #end if | 85 #end if |
| 73 <inputs> | 90 <inputs> |
| 74 <expand macro="sl_Conditional" model="zip"> | 91 <expand macro="sl_Conditional" model="zip"> |
| 75 <param name="selected_algorithm" type="select" label="Select an ensemble method:"> | 92 <param name="selected_algorithm" type="select" label="Select an ensemble method:"> |
| 76 <option value="RandomForestClassifier" selected="true">Random forest classifier</option> | 93 <option value="RandomForestClassifier" selected="true">Random forest classifier</option> |
| 77 <option value="AdaBoostClassifier">Ada boost classifier</option> | 94 <option value="AdaBoostClassifier">Ada boost classifier</option> |
| 95 <option value="GradientBoostingClassifier">Gradient Boosting Classifier</option> | |
| 78 <option value="RandomForestRegressor">Random forest regressor</option> | 96 <option value="RandomForestRegressor">Random forest regressor</option> |
| 79 <option value="AdaBoostRegressor">Ada boost regressor</option> | 97 <option value="AdaBoostRegressor">Ada boost regressor</option> |
| 98 <option value="GradientBoostingRegressor">Gradient Boosting Regressor</option> | |
| 80 </param> | 99 </param> |
| 81 <when value="RandomForestClassifier"> | 100 <when value="RandomForestClassifier"> |
| 82 <expand macro="sl_mixed_input"/> | 101 <expand macro="sl_mixed_input"/> |
| 83 <section name="options" title="Advanced Options" expanded="False"> | 102 <section name="options" title="Advanced Options" expanded="False"> |
| 84 <expand macro="n_estimators"/> | 103 <expand macro="n_estimators"/> |
| 89 <expand macro="min_samples_leaf"/> | 108 <expand macro="min_samples_leaf"/> |
| 90 <expand macro="min_weight_fraction_leaf"/> | 109 <expand macro="min_weight_fraction_leaf"/> |
| 91 <expand macro="max_leaf_nodes"/> | 110 <expand macro="max_leaf_nodes"/> |
| 92 <expand macro="bootstrap"/> | 111 <expand macro="bootstrap"/> |
| 93 <expand macro="warm_start" checked="false"/> | 112 <expand macro="warm_start" checked="false"/> |
| 113 <expand macro="n_jobs"/> | |
| 94 <expand macro="random_state"/> | 114 <expand macro="random_state"/> |
| 95 <expand macro="oob_score"/> | 115 <expand macro="oob_score"/> |
| 96 <!--class_weight=None--> | 116 <!--class_weight=None--> |
| 97 </section> | 117 </section> |
| 98 </when> | 118 </when> |
| 107 <option value="SAMME">SAMME</option> | 127 <option value="SAMME">SAMME</option> |
| 108 </param> | 128 </param> |
| 109 <expand macro="random_state"/> | 129 <expand macro="random_state"/> |
| 110 </section> | 130 </section> |
| 111 </when> | 131 </when> |
| 132 <when value="GradientBoostingClassifier"> | |
| 133 <expand macro="sl_mixed_input"/> | |
| 134 <section name="options" title="Advanced Options" expanded="False"> | |
| 135 <!--base_estimator=None--> | |
| 136 <param argument="loss" type="select" label="Loss function"> | |
| 137 <option value="deviance" selected="true">deviance - logistic regression with probabilistic outputs</option> | |
| 138 <option value="exponential">exponential - gradient boosting recovers the AdaBoost algorithm</option> | |
| 139 </param> | |
| 140 <expand macro="learning_rate" default_value='0.1'/> | |
| 141 <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/> | |
| 142 <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/> | |
| 143 <expand macro="criterion2"> | |
| 144 <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option> | |
| 145 </expand> | |
| 146 <expand macro="min_samples_split" type="float"/> | |
| 147 <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/> | |
| 148 <expand macro="min_weight_fraction_leaf"/> | |
| 149 <expand macro="subsample"/> | |
| 150 <expand macro="max_features"/> | |
| 151 <expand macro="max_leaf_nodes"/> | |
| 152 <expand macro="min_impurity_decrease"/> | |
| 153 <expand macro="verbose"/> | |
| 154 <expand macro="warm_start" checked="false"/> | |
| 155 <expand macro="random_state"/> | |
| 156 <expand macro="presort"/> | |
| 157 </section> | |
| 158 </when> | |
| 112 <when value="RandomForestRegressor"> | 159 <when value="RandomForestRegressor"> |
| 113 <expand macro="sl_mixed_input"/> | 160 <expand macro="sl_mixed_input"/> |
| 114 <section name="options" title="Advanced Options" expanded="False"> | 161 <section name="options" title="Advanced Options" expanded="False"> |
| 115 <expand macro="n_estimators"/> | 162 <expand macro="n_estimators"/> |
| 163 <expand macro="criterion2"/> | |
| 116 <expand macro="max_features"/> | 164 <expand macro="max_features"/> |
| 117 <expand macro="max_depth"/> | 165 <expand macro="max_depth"/> |
| 118 <expand macro="min_samples_split"/> | 166 <expand macro="min_samples_split"/> |
| 119 <expand macro="min_samples_leaf"/> | 167 <expand macro="min_samples_leaf"/> |
| 120 <expand macro="min_weight_fraction_leaf"/> | 168 <expand macro="min_weight_fraction_leaf"/> |
| 121 <expand macro="max_leaf_nodes"/> | 169 <expand macro="max_leaf_nodes"/> |
| 170 <expand macro="min_impurity_decrease"/> | |
| 122 <expand macro="bootstrap"/> | 171 <expand macro="bootstrap"/> |
| 172 <expand macro="oob_score"/> | |
| 173 <expand macro="n_jobs"/> | |
| 174 <expand macro="random_state"/> | |
| 175 <expand macro="verbose"/> | |
| 123 <expand macro="warm_start" checked="false"/> | 176 <expand macro="warm_start" checked="false"/> |
| 124 <expand macro="random_state"/> | |
| 125 <expand macro="oob_score"/> | |
| 126 </section> | 177 </section> |
| 127 </when> | 178 </when> |
| 128 <when value="AdaBoostRegressor"> | 179 <when value="AdaBoostRegressor"> |
| 129 <expand macro="sl_mixed_input"/> | 180 <expand macro="sl_mixed_input"/> |
| 130 <section name="options" title="Advanced Options" expanded="False"> | 181 <section name="options" title="Advanced Options" expanded="False"> |
| 137 <option value="exponential">exponential</option> | 188 <option value="exponential">exponential</option> |
| 138 </param> | 189 </param> |
| 139 <expand macro="random_state"/> | 190 <expand macro="random_state"/> |
| 140 </section> | 191 </section> |
| 141 </when> | 192 </when> |
| 193 <when value="GradientBoostingRegressor"> | |
| 194 <expand macro="sl_mixed_input"/> | |
| 195 <section name="options" title="Advanced Options" expanded="False"> | |
| 196 <param argument="loss" type="select" label="Loss function"> | |
| 197 <option value="ls" selected="true">ls - least squares regression</option> | |
| 198 <option value="lad">lad - least absolute deviation</option> | |
| 199 <option value="huber">huber - combination of least squares regression and least absolute deviation</option> | |
| 200 <option value="quantile">quantile - use alpha to specify the quantile</option> | |
| 201 </param> | |
| 202 <expand macro="learning_rate" default_value="0.1"/> | |
| 203 <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/> | |
| 204 <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/> | |
| 205 <expand macro="criterion2"> | |
| 206 <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option> | |
| 207 </expand> | |
| 208 <expand macro="min_samples_split" type="float"/> | |
| 209 <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/> | |
| 210 <expand macro="min_weight_fraction_leaf"/> | |
| 211 <expand macro="subsample"/> | |
| 212 <expand macro="max_features"/> | |
| 213 <expand macro="max_leaf_nodes"/> | |
| 214 <expand macro="min_impurity_decrease"/> | |
| 215 <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" /> | |
| 216 <!--base_estimator=None--> | |
| 217 <expand macro="verbose"/> | |
| 218 <expand macro="warm_start" checked="false"/> | |
| 219 <expand macro="random_state"/> | |
| 220 <expand macro="presort"/> | |
| 221 </section> | |
| 222 </when> | |
| 142 </expand> | 223 </expand> |
| 143 </inputs> | 224 </inputs> |
| 144 | 225 |
| 145 <expand macro="output"/> | 226 <expand macro="output"/> |
| 146 | 227 |
| 159 <param name="infile_model" value="rfc_model01" ftype="zip"/> | 240 <param name="infile_model" value="rfc_model01" ftype="zip"/> |
| 160 <param name="infile_data" value="test.tabular" ftype="tabular"/> | 241 <param name="infile_data" value="test.tabular" ftype="tabular"/> |
| 161 <param name="selected_task" value="load"/> | 242 <param name="selected_task" value="load"/> |
| 162 <output name="outfile_predict" file="rfc_result01" compare="sim_size" delta="500"/> | 243 <output name="outfile_predict" file="rfc_result01" compare="sim_size" delta="500"/> |
| 163 </test> | 244 </test> |
| 164 | |
| 165 <test> | 245 <test> |
| 166 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | 246 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
| 167 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | 247 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
| 168 <param name="col1" value="1,2,3,4,5"/> | 248 <param name="col1" value="1,2,3,4,5"/> |
| 169 <param name="col2" value="6"/> | 249 <param name="col2" value="6"/> |
| 175 <test> | 255 <test> |
| 176 <param name="infile_model" value="rfr_model01" ftype="zip"/> | 256 <param name="infile_model" value="rfr_model01" ftype="zip"/> |
| 177 <param name="infile_data" value="regression_test.tabular" ftype="tabular"/> | 257 <param name="infile_data" value="regression_test.tabular" ftype="tabular"/> |
| 178 <param name="selected_task" value="load"/> | 258 <param name="selected_task" value="load"/> |
| 179 <output name="outfile_predict" file="rfr_result01" compare="sim_size" delta="500"/> | 259 <output name="outfile_predict" file="rfr_result01" compare="sim_size" delta="500"/> |
| 260 </test> | |
| 261 <test> | |
| 262 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
| 263 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
| 264 <param name="header1" value="True"/> | |
| 265 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
| 266 <param name="header2" value="True"/> | |
| 267 <param name="col2" value="1"/> | |
| 268 <param name="selected_task" value="train"/> | |
| 269 <param name="selected_algorithm" value="GradientBoostingRegressor"/> | |
| 270 <param name="max_features" value="number_input"/> | |
| 271 <param name="num_max_features" value=""/> | |
| 272 <param name="random_state" value="42"/> | |
| 273 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="500"/> | |
| 274 </test> | |
| 275 <test> | |
| 276 <param name="infile_model" value="gbr_model01" ftype="zip"/> | |
| 277 <param name="infile_data" value="regression_test_X.tabular" ftype="tabular"/> | |
| 278 <param name="selected_task" value="load"/> | |
| 279 <param name="header" value="True"/> | |
| 280 <output name="outfile_predict" file="gbr_prediction_result01.tabular" compare="sim_size" delta="500"/> | |
| 281 </test> | |
| 282 <test> | |
| 283 <param name="infile1" value="train.tabular" ftype="tabular"/> | |
| 284 <param name="infile2" value="train.tabular" ftype="tabular"/> | |
| 285 <param name="col1" value="1,2,3,4"/> | |
| 286 <param name="col2" value="5"/> | |
| 287 <param name="selected_task" value="train"/> | |
| 288 <param name="selected_algorithm" value="GradientBoostingClassifier"/> | |
| 289 <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="500"/> | |
| 290 </test> | |
| 291 <test> | |
| 292 <param name="infile_model" value="gbc_model01" ftype="zip"/> | |
| 293 <param name="infile_data" value="test.tabular" ftype="tabular"/> | |
| 294 <param name="selected_task" value="load"/> | |
| 295 <output name="outfile_predict" file="gbc_result01" compare="sim_size" delta="500"/> | |
| 180 </test> | 296 </test> |
| 181 </tests> | 297 </tests> |
| 182 <help><