Mercurial > repos > bgruening > sklearn_train_test_eval
comparison train_test_eval.xml @ 8:f2c240cce242 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 208a8d348e7c7a182cfbe1b6f17868146428a7e2"
| author | bgruening |
|---|---|
| date | Tue, 13 Apr 2021 20:47:28 +0000 |
| parents | c9b521fcc3ac |
| children | 0edcdeaad6f4 |
comparison
equal
deleted
inserted
replaced
| 7:c9b521fcc3ac | 8:f2c240cce242 |
|---|---|
| 1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@"> | 1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@" profile="20.05"> |
| 2 <description>fit a model using part of dataset and evaluate using the rest</description> | 2 <description>fit a model using part of dataset and evaluate using the rest</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 <import>keras_macros.xml</import> | 5 <import>keras_macros.xml</import> |
| 6 </macros> | 6 </macros> |
| 7 <expand macro="python_requirements"/> | 7 <expand macro="python_requirements" /> |
| 8 <expand macro="macro_stdio"/> | 8 <expand macro="macro_stdio" /> |
| 9 <version_command>echo "@VERSION@"</version_command> | 9 <version_command>echo "@VERSION@"</version_command> |
| 10 <command detect_errors="aggressive"> | 10 <command detect_errors="aggressive"> |
| 11 <![CDATA[ | 11 <![CDATA[ |
| 12 export HDF5_USE_FILE_LOCKING='FALSE'; | 12 export HDF5_USE_FILE_LOCKING='FALSE'; |
| 13 #if $input_options.selected_input == 'refseq_and_interval' | 13 #if $input_options.selected_input == 'refseq_and_interval' |
| 49 <param name="selected_exp_scheme" type="select" label="Select a scheme"> | 49 <param name="selected_exp_scheme" type="select" label="Select a scheme"> |
| 50 <option value="train_test" selected="true">Train and Test</option> | 50 <option value="train_test" selected="true">Train and Test</option> |
| 51 <option value="train_val_test">Train, Validate and Test</option> | 51 <option value="train_val_test">Train, Validate and Test</option> |
| 52 </param> | 52 </param> |
| 53 <when value="train_test"> | 53 <when value="train_test"> |
| 54 <expand macro="estimator_and_hyperparameter"/> | 54 <expand macro="estimator_and_hyperparameter" /> |
| 55 <section name="test_split" title="Test holdout" expanded="false"> | 55 <section name="test_split" title="Test holdout" expanded="false"> |
| 56 <expand macro="train_test_split_params"> | 56 <expand macro="train_test_split_params"> |
| 57 <expand macro="cv_groups"/> | 57 <expand macro="cv_groups" /> |
| 58 </expand> | 58 </expand> |
| 59 </section> | 59 </section> |
| 60 <section name="metrics" title="Metrics for evaluation" expanded="false"> | 60 <section name="metrics" title="Metrics for evaluation" expanded="false"> |
| 61 <expand macro="scoring_selection"/> | 61 <expand macro="scoring_selection" /> |
| 62 </section> | 62 </section> |
| 63 </when> | 63 </when> |
| 64 <when value="train_val_test"> | 64 <when value="train_val_test"> |
| 65 <expand macro="estimator_and_hyperparameter"/> | 65 <expand macro="estimator_and_hyperparameter" /> |
| 66 <section name="test_split" title="Test holdout" expanded="false"> | 66 <section name="test_split" title="Test holdout" expanded="false"> |
| 67 <expand macro="train_test_split_params"> | 67 <expand macro="train_test_split_params"> |
| 68 <expand macro="cv_groups"/> | 68 <expand macro="cv_groups" /> |
| 69 </expand> | 69 </expand> |
| 70 </section> | 70 </section> |
| 71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false"> | 71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false"> |
| 72 <expand macro="train_test_split_params"/> | 72 <expand macro="train_test_split_params" /> |
| 73 </section> | 73 </section> |
| 74 <section name="metrics" title="Metrics for evaluation" expanded="false"> | 74 <section name="metrics" title="Metrics for evaluation" expanded="false"> |
| 75 <expand macro="scoring_selection"/> | 75 <expand macro="scoring_selection" /> |
| 76 </section> | 76 </section> |
| 77 </when> | 77 </when> |
| 78 </conditional> | 78 </conditional> |
| 79 <expand macro="sl_mixed_input_plus_sequence"/> | 79 <expand macro="sl_mixed_input_plus_sequence" /> |
| 80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights."> | 80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights."> |
| 81 <option value="nope" selected="true">Nope, save is unnecessary</option> | 81 <option value="nope" selected="true">Nope, save is unnecessary</option> |
| 82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option> | 82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option> |
| 83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option> | 83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option> |
| 84 </param> | 84 </param> |
| 85 </inputs> | 85 </inputs> |
| 86 <outputs> | 86 <outputs> |
| 87 <data format="tabular" name="outfile_result"/> | 87 <data format="tabular" name="outfile_result" /> |
| 88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> | 88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> |
| 89 <filter>save != 'nope'</filter> | 89 <filter>save != 'nope'</filter> |
| 90 </data> | 90 </data> |
| 91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> | 91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> |
| 92 <filter>save == 'save_weights'</filter> | 92 <filter>save == 'save_weights'</filter> |
| 93 </data> | 93 </data> |
| 94 </outputs> | 94 </outputs> |
| 95 <tests> | 95 <tests> |
| 96 <test> | 96 <test> |
| 97 <conditional name="experiment_schemes"> | 97 <conditional name="experiment_schemes"> |
| 98 <param name="selected_exp_scheme" value="train_val_test"/> | 98 <param name="selected_exp_scheme" value="train_val_test" /> |
| 99 <param name="infile_estimator" value="keras_model04" ftype="zip"/> | 99 <param name="infile_estimator" value="keras_model04" ftype="zip" /> |
| 100 <section name="hyperparams_swapping"> | 100 <section name="hyperparams_swapping"> |
| 101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/> | 101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" /> |
| 102 <repeat name="param_set"> | 102 <repeat name="param_set"> |
| 103 <param name="sp_value" value="999"/> | 103 <param name="sp_value" value="999" /> |
| 104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/> | 104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" /> |
| 105 </repeat> | 105 </repeat> |
| 106 <repeat name="param_set"> | 106 <repeat name="param_set"> |
| 107 <param name="sp_value" value="999"/> | 107 <param name="sp_value" value="999" /> |
| 108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/> | 108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" /> |
| 109 </repeat> | 109 </repeat> |
| 110 <repeat name="param_set"> | 110 <repeat name="param_set"> |
| 111 <param name="sp_value" value="0.1"/> | 111 <param name="sp_value" value="0.1" /> |
| 112 <param name="sp_name" value="lr"/> | 112 <param name="sp_name" value="lr" /> |
| 113 </repeat> | 113 </repeat> |
| 114 <repeat name="param_set"> | 114 <repeat name="param_set"> |
| 115 <param name="sp_value" value="'adamax'"/> | 115 <param name="sp_value" value="'adamax'" /> |
| 116 <param name="sp_name" value="optimizer"/> | 116 <param name="sp_name" value="optimizer" /> |
| 117 </repeat> | 117 </repeat> |
| 118 </section> | 118 </section> |
| 119 <section name="test_split"> | 119 <section name="test_split"> |
| 120 <conditional name="split_algos"> | 120 <conditional name="split_algos"> |
| 121 <param name="shuffle" value="simple"/> | 121 <param name="shuffle" value="simple" /> |
| 122 <param name="test_size" value="0.2"/> | 122 <param name="test_size" value="0.2" /> |
| 123 <param name="random_state" value="123"/> | 123 <param name="random_state" value="123" /> |
| 124 </conditional> | 124 </conditional> |
| 125 </section> | 125 </section> |
| 126 <section name="val_split"> | 126 <section name="val_split"> |
| 127 <conditional name="split_algos"> | 127 <conditional name="split_algos"> |
| 128 <param name="shuffle" value="simple"/> | 128 <param name="shuffle" value="simple" /> |
| 129 <param name="test_size" value="0.2"/> | 129 <param name="test_size" value="0.2" /> |
| 130 <param name="random_state" value="456"/> | 130 <param name="random_state" value="456" /> |
| 131 </conditional> | 131 </conditional> |
| 132 </section> | 132 </section> |
| 133 <section name="metrics"> | 133 <section name="metrics"> |
| 134 <conditional name="scoring"> | 134 <conditional name="scoring"> |
| 135 <param name="primary_scoring" value="r2"/> | 135 <param name="primary_scoring" value="r2" /> |
| 136 <param name="secondary_scoring" value="neg_mean_absolute_error"/> | 136 <param name="secondary_scoring" value="neg_mean_absolute_error" /> |
| 137 </conditional> | 137 </conditional> |
| 138 </section> | 138 </section> |
| 139 </conditional> | 139 </conditional> |
| 140 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 140 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
| 141 <param name="header1" value="true" /> | 141 <param name="header1" value="true" /> |
| 142 <param name="selected_column_selector_option" value="all_columns"/> | 142 <param name="selected_column_selector_option" value="all_columns" /> |
| 143 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 143 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
| 144 <param name="header2" value="true" /> | 144 <param name="header2" value="true" /> |
| 145 <param name="selected_column_selector_option2" value="all_columns"/> | 145 <param name="selected_column_selector_option2" value="all_columns" /> |
| 146 <param name="save" value="save_weights"/> | 146 <param name="save" value="save_weights" /> |
| 147 <output name="outfile_result"> | 147 <output name="outfile_result"> |
| 148 <assert_contents> | 148 <assert_contents> |
| 149 <has_n_columns n="2"/> | 149 <has_n_columns n="2" /> |
| 150 <has_text text="0.6626"/> | 150 <has_text text="0.6384" /> |
| 151 <has_text text="5.598"/> | 151 <has_text text="-6.072" /> |
| 152 </assert_contents> | 152 </assert_contents> |
| 153 </output> | 153 </output> |
| 154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5"/> | 154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5" /> |
| 155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5"/> | 155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5" /> |
| 156 </test> | 156 </test> |
| 157 <test> | 157 <test> |
| 158 <conditional name="experiment_schemes"> | 158 <conditional name="experiment_schemes"> |
| 159 <param name="selected_exp_scheme" value="train_val_test"/> | 159 <param name="selected_exp_scheme" value="train_val_test" /> |
| 160 <param name="infile_estimator" value="keras_model04" ftype="zip"/> | 160 <param name="infile_estimator" value="keras_model04" ftype="zip" /> |
| 161 <section name="hyperparams_swapping"> | 161 <section name="hyperparams_swapping"> |
| 162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/> | 162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" /> |
| 163 <repeat name="param_set"> | 163 <repeat name="param_set"> |
| 164 <param name="sp_value" value="999"/> | 164 <param name="sp_value" value="999" /> |
| 165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/> | 165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" /> |
| 166 </repeat> | 166 </repeat> |
| 167 <repeat name="param_set"> | 167 <repeat name="param_set"> |
| 168 <param name="sp_value" value="999"/> | 168 <param name="sp_value" value="999" /> |
| 169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/> | 169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" /> |
| 170 </repeat> | 170 </repeat> |
| 171 <repeat name="param_set"> | 171 <repeat name="param_set"> |
| 172 <param name="sp_value" value="0.1"/> | 172 <param name="sp_value" value="0.1" /> |
| 173 <param name="sp_name" value="lr"/> | 173 <param name="sp_name" value="lr" /> |
| 174 </repeat> | 174 </repeat> |
| 175 <repeat name="param_set"> | 175 <repeat name="param_set"> |
| 176 <param name="sp_value" value="'adamax'"/> | 176 <param name="sp_value" value="'adamax'" /> |
| 177 <param name="sp_name" value="optimizer"/> | 177 <param name="sp_name" value="optimizer" /> |
| 178 </repeat> | 178 </repeat> |
| 179 </section> | 179 </section> |
| 180 <section name="test_split"> | 180 <section name="test_split"> |
| 181 <conditional name="split_algos"> | 181 <conditional name="split_algos"> |
| 182 <param name="shuffle" value="group"/> | 182 <param name="shuffle" value="group" /> |
| 183 <param name="group_names" value="test"/> | 183 <param name="group_names" value="test" /> |
| 184 <section name="groups_selector"> | 184 <section name="groups_selector"> |
| 185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular"/> | 185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular" /> |
| 186 <param name="header_g" value="true"/> | 186 <param name="header_g" value="true" /> |
| 187 <conditional name="column_selector_options_g"> | 187 <conditional name="column_selector_options_g"> |
| 188 <param name="selected_column_selector_option_g" value="by_index_number"/> | 188 <param name="selected_column_selector_option_g" value="by_index_number" /> |
| 189 <param name="col_g" value="1"/> | 189 <param name="col_g" value="1" /> |
| 190 </conditional> | 190 </conditional> |
| 191 </section> | 191 </section> |
| 192 </conditional> | 192 </conditional> |
| 193 </section> | 193 </section> |
| 194 <section name="val_split"> | 194 <section name="val_split"> |
| 195 <conditional name="split_algos"> | 195 <conditional name="split_algos"> |
| 196 <param name="shuffle" value="group"/> | 196 <param name="shuffle" value="group" /> |
| 197 <param name="group_names" value="validation"/> | 197 <param name="group_names" value="validation" /> |
| 198 </conditional> | 198 </conditional> |
| 199 </section> | 199 </section> |
| 200 <section name="metrics"> | 200 <section name="metrics"> |
| 201 <conditional name="scoring"> | 201 <conditional name="scoring"> |
| 202 <param name="primary_scoring" value="r2"/> | 202 <param name="primary_scoring" value="r2" /> |
| 203 <param name="secondary_scoring" value="neg_mean_absolute_error"/> | 203 <param name="secondary_scoring" value="neg_mean_absolute_error" /> |
| 204 </conditional> | 204 </conditional> |
| 205 </section> | 205 </section> |
| 206 </conditional> | 206 </conditional> |
| 207 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 207 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
| 208 <param name="header1" value="true" /> | 208 <param name="header1" value="true" /> |
| 209 <param name="selected_column_selector_option" value="all_columns"/> | 209 <param name="selected_column_selector_option" value="all_columns" /> |
| 210 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 210 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
| 211 <param name="header2" value="true" /> | 211 <param name="header2" value="true" /> |
| 212 <param name="selected_column_selector_option2" value="all_columns"/> | 212 <param name="selected_column_selector_option2" value="all_columns" /> |
| 213 <param name="save" value="save_weights"/> | 213 <param name="save" value="save_weights" /> |
| 214 <output name="outfile_result" > | 214 <output name="outfile_result"> |
| 215 <assert_contents> | 215 <assert_contents> |
| 216 <has_n_columns n="2"/> | 216 <has_n_columns n="2" /> |
| 217 <has_text text="0.667"/> | 217 <has_text text="0.627" /> |
| 218 <has_text text="-5.586"/> | 218 <has_text text="-6.012" /> |
| 219 </assert_contents> | 219 </assert_contents> |
| 220 </output> | 220 </output> |
| 221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5"/> | 221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5" /> |
| 222 </test> | 222 </test> |
| 223 <test> | 223 <test> |
| 224 <conditional name="experiment_schemes"> | 224 <conditional name="experiment_schemes"> |
| 225 <param name="selected_exp_scheme" value="train_test"/> | 225 <param name="selected_exp_scheme" value="train_test" /> |
| 226 <param name="infile_estimator" value="pipeline10" ftype="zip"/> | 226 <param name="infile_estimator" value="pipeline10" ftype="zip" /> |
| 227 <section name="hyperparams_swapping"> | 227 <section name="hyperparams_swapping"> |
| 228 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/> | 228 <param name="infile_params" value="get_params10.tabular" ftype="tabular" /> |
| 229 <repeat name="param_set"> | 229 <repeat name="param_set"> |
| 230 <param name="sp_value" value="10"/> | 230 <param name="sp_value" value="10" /> |
| 231 <param name="sp_name" value="adaboostregressor__random_state"/> | 231 <param name="sp_name" value="adaboostregressor__random_state" /> |
| 232 </repeat> | 232 </repeat> |
| 233 <repeat name="param_set"> | 233 <repeat name="param_set"> |
| 234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)"/> | 234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)" /> |
| 235 <param name="sp_name" value="adaboostregressor__base_estimator"/> | 235 <param name="sp_name" value="adaboostregressor__base_estimator" /> |
| 236 </repeat> | 236 </repeat> |
| 237 </section> | 237 </section> |
| 238 <section name="test_split"> | 238 <section name="test_split"> |
| 239 <conditional name="split_algos"> | 239 <conditional name="split_algos"> |
| 240 <param name="shuffle" value="simple"/> | 240 <param name="shuffle" value="simple" /> |
| 241 <param name="test_size" value="0.2"/> | 241 <param name="test_size" value="0.2" /> |
| 242 <param name="random_state" value="123"/> | 242 <param name="random_state" value="123" /> |
| 243 </conditional> | 243 </conditional> |
| 244 </section> | 244 </section> |
| 245 <section name="val_split"> | 245 <section name="val_split"> |
| 246 <conditional name="split_algos"> | 246 <conditional name="split_algos"> |
| 247 <param name="shuffle" value="simple"/> | 247 <param name="shuffle" value="simple" /> |
| 248 <param name="test_size" value="0.2"/> | 248 <param name="test_size" value="0.2" /> |
| 249 <param name="random_state" value="456"/> | 249 <param name="random_state" value="456" /> |
| 250 </conditional> | 250 </conditional> |
| 251 </section> | 251 </section> |
| 252 <section name="metrics"> | 252 <section name="metrics"> |
| 253 <conditional name="scoring"> | 253 <conditional name="scoring"> |
| 254 <param name="primary_scoring" value="r2"/> | 254 <param name="primary_scoring" value="r2" /> |
| 255 <param name="secondary_scoring" value="neg_mean_absolute_error"/> | 255 <param name="secondary_scoring" value="neg_mean_absolute_error" /> |
| 256 </conditional> | 256 </conditional> |
| 257 </section> | 257 </section> |
| 258 </conditional> | 258 </conditional> |
| 259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 259 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
| 260 <param name="header1" value="true" /> | 260 <param name="header1" value="true" /> |
| 261 <param name="selected_column_selector_option" value="all_columns"/> | 261 <param name="selected_column_selector_option" value="all_columns" /> |
| 262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 262 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
| 263 <param name="header2" value="true" /> | 263 <param name="header2" value="true" /> |
| 264 <param name="selected_column_selector_option2" value="all_columns"/> | 264 <param name="selected_column_selector_option2" value="all_columns" /> |
| 265 <param name="save" value="nope"/> | 265 <param name="save" value="nope" /> |
| 266 <output name="outfile_result" file="train_test_eval03.tabular"/> | 266 <output name="outfile_result" file="train_test_eval03.tabular" /> |
| 267 </test> | 267 </test> |
| 268 </tests> | 268 </tests> |
| 269 <help> | 269 <help> |
| 270 <![CDATA[ | 270 <![CDATA[ |
| 271 **What it does** | 271 **What it does** |
| 281 Performance scores. | 281 Performance scores. |
| 282 | 282 |
| 283 ]]> | 283 ]]> |
| 284 </help> | 284 </help> |
| 285 <expand macro="sklearn_citation"> | 285 <expand macro="sklearn_citation"> |
| 286 <expand macro="skrebate_citation"/> | 286 <expand macro="skrebate_citation" /> |
| 287 <expand macro="xgboost_citation"/> | 287 <expand macro="xgboost_citation" /> |
| 288 <expand macro="keras_citation"/> | 288 <expand macro="keras_citation" /> |
| 289 </expand> | 289 </expand> |
| 290 </tool> | 290 </tool> |
