Mercurial > repos > bgruening > stacking_ensemble_models
comparison stacking_ensembles.xml @ 3:0a1812986bc3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
| author | bgruening |
|---|---|
| date | Wed, 09 Aug 2023 11:10:37 +0000 |
| parents | 38c4f8a98038 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:38c4f8a98038 | 3:0a1812986bc3 |
|---|---|
| 1 <tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@"> | 1 <tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@" profile="@PROFILE@"> |
| 2 <description>builds stacking, voting ensemble models with numerous base options</description> | 2 <description>builds stacking, voting ensemble models with numerous base options</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 <macro name="stacking_ensemble_inputs"> | |
| 6 <section name="options" title="Advanced Options" expanded="false"> | |
| 7 <yield /> | |
| 8 <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> | |
| 9 <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> | |
| 10 </section> | |
| 11 </macro> | |
| 12 <macro name="stacking_base_estimator"> | |
| 13 <conditional name="estimator_selector"> | |
| 14 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > | |
| 15 <expand macro="estimator_module_options"> | |
| 16 <option value="custom_estimator">Load a custom estimator</option> | |
| 17 </expand> | |
| 18 </param> | |
| 19 <expand macro="estimator_suboptions"> | |
| 20 <when value="custom_estimator"> | |
| 21 <param name="c_estimator" type="data" format="h5mlm" label="Choose the dataset containing the custom estimator or pipeline" /> | |
| 22 </when> | |
| 23 </expand> | |
| 24 </conditional> | |
| 25 </macro> | |
| 26 <macro name="stacking_voting_weights"> | |
| 27 <section name="options" title="Advanced Options" expanded="false"> | |
| 28 <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`)."> | |
| 29 <sanitizer> | |
| 30 <valid initial="default"> | |
| 31 <add value="[" /> | |
| 32 <add value="]" /> | |
| 33 </valid> | |
| 34 </sanitizer> | |
| 35 </param> | |
| 36 <yield /> | |
| 37 </section> | |
| 38 </macro> | |
| 5 </macros> | 39 </macros> |
| 6 <expand macro="python_requirements"/> | 40 <expand macro="python_requirements" /> |
| 7 <expand macro="macro_stdio"/> | 41 <expand macro="macro_stdio" /> |
| 8 <version_command>echo "$ENSEMBLE_VERSION"</version_command> | 42 <version_command>echo "$ENSEMBLE_VERSION"</version_command> |
| 9 <command> | 43 <command> |
| 10 <![CDATA[ | 44 <![CDATA[ |
| 11 #for $i, $base in enumerate($base_est_builder) | 45 #for $i, $base in enumerate($base_est_builder) |
| 12 #if $i == 0 | 46 #if $i == 0 |
| 28 #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor') | 62 #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor') |
| 29 #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator' | 63 #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator' |
| 30 --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}' | 64 --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}' |
| 31 #end if | 65 #end if |
| 32 #end if | 66 #end if |
| 33 #if $get_params | |
| 34 --outfile_params '$outfile_params' | |
| 35 #end if | |
| 36 ]]> | 67 ]]> |
| 37 </command> | 68 </command> |
| 38 <configfiles> | 69 <configfiles> |
| 39 <inputs name="inputs" /> | 70 <inputs name="inputs" /> |
| 40 </configfiles> | 71 </configfiles> |
| 52 <expand macro="stacking_voting_weights"> | 83 <expand macro="stacking_voting_weights"> |
| 53 <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers."> | 84 <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers."> |
| 54 <option value="hard" selected="true">hard</option> | 85 <option value="hard" selected="true">hard</option> |
| 55 <option value="soft">soft</option> | 86 <option value="soft">soft</option> |
| 56 </param> | 87 </param> |
| 57 <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help=""/> | 88 <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="" /> |
| 58 </expand> | 89 </expand> |
| 59 </when> | 90 </when> |
| 60 <when value="sklearn.ensemble_VotingRegressor"> | 91 <when value="sklearn.ensemble_VotingRegressor"> |
| 61 <expand macro="stacking_voting_weights"/> | 92 <expand macro="stacking_voting_weights" /> |
| 62 </when> | 93 </when> |
| 63 <when value="mlxtend.classifier_StackingCVClassifier"> | 94 <when value="mlxtend.classifier_StackingCVClassifier"> |
| 64 <expand macro="stacking_ensemble_inputs"> | 95 <expand macro="stacking_ensemble_inputs"> |
| 65 <expand macro="cv_reduced"/> | 96 <expand macro="cv_reduced" /> |
| 66 <expand macro="shuffle" label="shuffle"/> | 97 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> |
| 67 <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/> | 98 <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> |
| 68 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> | 99 </expand> |
| 69 </expand> | 100 <section name="meta_estimator" title="Meta Estimator" expanded="true"> |
| 70 <section name="meta_estimator" title="Meta Estimator" expanded="true"> | 101 <expand macro="stacking_base_estimator" /> |
| 71 <expand macro="stacking_base_estimator"/> | |
| 72 </section> | 102 </section> |
| 73 </when> | 103 </when> |
| 74 <when value="mlxtend.classifier_StackingClassifier"> | 104 <when value="mlxtend.classifier_StackingClassifier"> |
| 75 <expand macro="stacking_ensemble_inputs"> | 105 <expand macro="stacking_ensemble_inputs"> |
| 76 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> | 106 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> |
| 77 <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> | 107 <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> |
| 78 </expand> | 108 <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> |
| 79 <section name="meta_estimator" title="Meta Estimator" expanded="true"> | 109 </expand> |
| 80 <expand macro="stacking_base_estimator"/> | 110 <section name="meta_estimator" title="Meta Estimator" expanded="true"> |
| 111 <expand macro="stacking_base_estimator" /> | |
| 81 </section> | 112 </section> |
| 82 </when> | 113 </when> |
| 83 <when value="mlxtend.regressor_StackingCVRegressor"> | 114 <when value="mlxtend.regressor_StackingCVRegressor"> |
| 84 <expand macro="stacking_ensemble_inputs"> | 115 <expand macro="stacking_ensemble_inputs"> |
| 85 <expand macro="cv_reduced"/> | 116 <expand macro="cv_reduced" /> |
| 86 <!--TODO support group splitters. Hint: `groups` is a fit_param--> | 117 <!--TODO support group splitters. Hint: `groups` is a fit_param--> |
| 87 <expand macro="shuffle" label="shuffle"/> | 118 <expand macro="shuffle" label="shuffle" /> |
| 88 <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/> | 119 <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data." /> |
| 89 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/> | 120 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" /> |
| 90 </expand> | 121 </expand> |
| 91 <section name="meta_estimator" title="Meta Estimator" expanded="true"> | 122 <section name="meta_estimator" title="Meta Estimator" expanded="true"> |
| 92 <expand macro="stacking_base_estimator"/> | 123 <expand macro="stacking_base_estimator" /> |
| 93 </section> | 124 </section> |
| 94 </when> | 125 </when> |
| 95 <when value="mlxtend.regressor_StackingRegressor"> | 126 <when value="mlxtend.regressor_StackingRegressor"> |
| 96 <expand macro="stacking_ensemble_inputs"> | 127 <expand macro="stacking_ensemble_inputs"> |
| 97 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/> | 128 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" /> |
| 98 </expand> | 129 </expand> |
| 99 <section name="meta_estimator" title="Meta Estimator" expanded="true"> | 130 <section name="meta_estimator" title="Meta Estimator" expanded="true"> |
| 100 <expand macro="stacking_base_estimator"/> | 131 <expand macro="stacking_base_estimator" /> |
| 101 </section> | 132 </section> |
| 102 </when> | 133 </when> |
| 103 </conditional> | 134 </conditional> |
| 104 <repeat name="base_est_builder" min="1" max="20" title="Base Estimator"> | 135 <repeat name="base_est_builder" min="1" max="20" title="Base Estimator"> |
| 105 <expand macro="stacking_base_estimator"/> | 136 <expand macro="stacking_base_estimator" /> |
| 106 <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/--> | |
| 107 </repeat> | 137 </repeat> |
| 108 <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/--> | |
| 109 <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?" | |
| 110 help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/> | |
| 111 </inputs> | 138 </inputs> |
| 112 <outputs> | 139 <outputs> |
| 113 <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/> | 140 <data format="h5mlm" name="outfile" label="${algo_selection.estimator_type} on ${on_string}" /> |
| 114 <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}"> | |
| 115 <filter>get_params</filter> | |
| 116 </data> | |
| 117 </outputs> | 141 </outputs> |
| 118 <tests> | 142 <tests> |
| 119 <test> | 143 <test> |
| 120 <conditional name="algo_selection"> | 144 <conditional name="algo_selection"> |
| 121 <param name="estimator_type" value="sklearn.ensemble_VotingClassifier"/> | 145 <param name="estimator_type" value="sklearn.ensemble_VotingClassifier" /> |
| 122 <section name="options"> | 146 <section name="options"> |
| 123 <param name="weights" value="[1, 2]"/> | 147 <param name="weights" value="[1, 2]" /> |
| 124 </section> | 148 </section> |
| 125 </conditional> | 149 </conditional> |
| 126 <repeat name="base_est_builder"> | 150 <repeat name="base_est_builder"> |
| 127 <conditional name="estimator_selector"> | 151 <conditional name="estimator_selector"> |
| 128 <param name="selected_module" value="svm"/> | 152 <param name="selected_module" value="svm" /> |
| 129 <param name="selected_estimator" value="SVC"/> | 153 <param name="selected_estimator" value="SVC" /> |
| 130 </conditional> | 154 </conditional> |
| 131 </repeat> | 155 </repeat> |
| 132 <repeat name="base_est_builder"> | 156 <repeat name="base_est_builder"> |
| 133 <conditional name="estimator_selector"> | 157 <conditional name="estimator_selector"> |
| 134 <param name="selected_module" value="xgboost"/> | 158 <param name="selected_module" value="xgboost" /> |
| 135 <param name="selected_estimator" value="XGBClassifier"/> | 159 <param name="selected_estimator" value="XGBClassifier" /> |
| 136 </conditional> | 160 </conditional> |
| 137 </repeat> | 161 </repeat> |
| 138 <param name="get_params" value="false"/> | 162 <output name="outfile" file="StackingVoting03.h5mlm" compare="sim_size" delta="5" /> |
| 139 <output name="outfile" file="StackingVoting03.zip" compare="sim_size" delta="5"/> | |
| 140 </test> | 163 </test> |
| 141 <test> | 164 <test> |
| 142 <conditional name="algo_selection"> | 165 <conditional name="algo_selection"> |
| 143 <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor"/> | 166 <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor" /> |
| 144 <section name="meta_estimator"> | 167 <section name="meta_estimator"> |
| 145 <conditional name="estimator_selector"> | 168 <conditional name="estimator_selector"> |
| 146 <param name="selected_module" value="custom_estimator"/> | 169 <param name="selected_module" value="custom_estimator" /> |
| 147 <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/> | 170 <param name="c_estimator" value="LinearRegression01.h5mlm" ftype="h5mlm" /> |
| 148 </conditional> | 171 </conditional> |
| 149 </section> | 172 </section> |
| 150 </conditional> | 173 </conditional> |
| 151 <repeat name="base_est_builder"> | 174 <repeat name="base_est_builder"> |
| 152 <conditional name="estimator_selector"> | 175 <conditional name="estimator_selector"> |
| 153 <param name="selected_module" value="custom_estimator"/> | 176 <param name="selected_module" value="custom_estimator" /> |
| 154 <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/> | 177 <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" /> |
| 155 </conditional> | 178 </conditional> |
| 156 </repeat> | 179 </repeat> |
| 157 <repeat name="base_est_builder"> | 180 <repeat name="base_est_builder"> |
| 158 <conditional name="estimator_selector"> | 181 <conditional name="estimator_selector"> |
| 159 <param name="selected_module" value="custom_estimator"/> | 182 <param name="selected_module" value="custom_estimator" /> |
| 160 <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/> | 183 <param name="c_estimator" value="XGBRegressor01.h5mlm" ftype="h5mlm" /> |
| 161 </conditional> | 184 </conditional> |
| 162 </repeat> | 185 </repeat> |
| 163 <param name="get_params" value="false"/> | 186 <output name="outfile" file="StackingCVRegressor01.h5mlm" compare="sim_size" delta="5" /> |
| 164 <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/> | |
| 165 </test> | 187 </test> |
| 166 <test> | 188 <test> |
| 167 <conditional name="algo_selection"> | 189 <conditional name="algo_selection"> |
| 168 <param name="estimator_type" value="mlxtend.regressor_StackingRegressor"/> | 190 <param name="estimator_type" value="mlxtend.regressor_StackingRegressor" /> |
| 169 <section name="meta_estimator"> | 191 <section name="meta_estimator"> |
| 170 <conditional name="estimator_selector"> | 192 <conditional name="estimator_selector"> |
| 171 <param name="selected_module" value="svm"/> | 193 <param name="selected_module" value="svm" /> |
| 172 <param name="selected_estimator" value="SVR"/> | 194 <param name="selected_estimator" value="SVR" /> |
| 173 </conditional> | 195 </conditional> |
| 174 </section> | 196 </section> |
| 175 </conditional> | 197 </conditional> |
| 176 <repeat name="base_est_builder"> | 198 <repeat name="base_est_builder"> |
| 177 <conditional name="estimator_selector"> | 199 <conditional name="estimator_selector"> |
| 178 <param name="selected_module" value="custom_estimator"/> | 200 <param name="selected_module" value="custom_estimator" /> |
| 179 <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/> | 201 <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" /> |
| 180 </conditional> | 202 </conditional> |
| 181 </repeat> | 203 </repeat> |
| 182 <repeat name="base_est_builder"> | 204 <repeat name="base_est_builder"> |
| 183 <conditional name="estimator_selector"> | 205 <conditional name="estimator_selector"> |
| 184 <param name="selected_module" value="xgboost"/> | 206 <param name="selected_module" value="xgboost" /> |
| 185 <param name="selected_estimator" value="XGBRegressor"/> | 207 <param name="selected_estimator" value="XGBRegressor" /> |
| 186 </conditional> | 208 </conditional> |
| 187 </repeat> | 209 </repeat> |
| 188 <param name="get_params" value="false"/> | 210 <output name="outfile" file="StackingRegressor02.h5mlm" compare="sim_size" delta="5" /> |
| 189 <output name="outfile" file="StackingRegressor02.zip" compare="sim_size" delta="5"/> | |
| 190 </test> | 211 </test> |
| 191 </tests> | 212 </tests> |
| 192 <help> | 213 <help> |
| 193 <![CDATA[ | 214 <![CDATA[ |
| 194 This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train | 215 This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train |
| 195 on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous | 216 on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous |
| 196 prediction results to ensemble a strong learner. | 217 prediction results to ensemble a strong learner. |
| 197 Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_. | 218 Refer to https://h2o-release.s3.amazonaws.com/h2o/rel-ueno/2/docs-website/h2o-docs/data-science/stacked-ensembles.html. |
| 198 | 219 |
| 199 .. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`: | |
| 200 http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction | |
| 201 | 220 |
| 202 ]]> | 221 ]]> |
| 203 </help> | 222 </help> |
| 204 <expand macro="sklearn_citation"> | 223 <expand macro="sklearn_citation"> |
| 205 <expand macro="skrebate_citation"/> | 224 <expand macro="skrebate_citation" /> |
| 206 <expand macro="xgboost_citation"/> | 225 <expand macro="xgboost_citation" /> |
| 207 <expand macro="imblearn_citation"/> | 226 <expand macro="imblearn_citation" /> |
| 208 <citation type="bibtex"> | 227 <citation type="bibtex"> |
| 209 @article{raschkas_2018_mlxtend, | 228 @article{raschkas_2018_mlxtend, |
| 210 author = {Sebastian Raschka}, | 229 author = {Sebastian Raschka}, |
| 211 title = {MLxtend: Providing machine learning and data science | 230 title = {MLxtend: Providing machine learning and data science |
| 212 utilities and extensions to Python’s | 231 utilities and extensions to Python’s |
