comparison stacking_ensembles.xml @ 3:0a1812986bc3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 11:10:37 +0000
parents 38c4f8a98038
children
comparison
equal deleted inserted replaced
2:38c4f8a98038 3:0a1812986bc3
1 <tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@"> 1 <tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@" profile="@PROFILE@">
2 <description>builds stacking, voting ensemble models with numerous base options</description> 2 <description>builds stacking, voting ensemble models with numerous base options</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 <macro name="stacking_ensemble_inputs">
6 <section name="options" title="Advanced Options" expanded="false">
7 <yield />
8 <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
9 <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
10 </section>
11 </macro>
12 <macro name="stacking_base_estimator">
13 <conditional name="estimator_selector">
14 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
15 <expand macro="estimator_module_options">
16 <option value="custom_estimator">Load a custom estimator</option>
17 </expand>
18 </param>
19 <expand macro="estimator_suboptions">
20 <when value="custom_estimator">
21 <param name="c_estimator" type="data" format="h5mlm" label="Choose the dataset containing the custom estimator or pipeline" />
22 </when>
23 </expand>
24 </conditional>
25 </macro>
26 <macro name="stacking_voting_weights">
27 <section name="options" title="Advanced Options" expanded="false">
28 <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`).">
29 <sanitizer>
30 <valid initial="default">
31 <add value="[" />
32 <add value="]" />
33 </valid>
34 </sanitizer>
35 </param>
36 <yield />
37 </section>
38 </macro>
5 </macros> 39 </macros>
6 <expand macro="python_requirements"/> 40 <expand macro="python_requirements" />
7 <expand macro="macro_stdio"/> 41 <expand macro="macro_stdio" />
8 <version_command>echo "$ENSEMBLE_VERSION"</version_command> 42 <version_command>echo "$ENSEMBLE_VERSION"</version_command>
9 <command> 43 <command>
10 <![CDATA[ 44 <![CDATA[
11 #for $i, $base in enumerate($base_est_builder) 45 #for $i, $base in enumerate($base_est_builder)
12 #if $i == 0 46 #if $i == 0
28 #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor') 62 #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor')
29 #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator' 63 #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator'
30 --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}' 64 --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}'
31 #end if 65 #end if
32 #end if 66 #end if
33 #if $get_params
34 --outfile_params '$outfile_params'
35 #end if
36 ]]> 67 ]]>
37 </command> 68 </command>
38 <configfiles> 69 <configfiles>
39 <inputs name="inputs" /> 70 <inputs name="inputs" />
40 </configfiles> 71 </configfiles>
52 <expand macro="stacking_voting_weights"> 83 <expand macro="stacking_voting_weights">
53 <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers."> 84 <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers.">
54 <option value="hard" selected="true">hard</option> 85 <option value="hard" selected="true">hard</option>
55 <option value="soft">soft</option> 86 <option value="soft">soft</option>
56 </param> 87 </param>
57 <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help=""/> 88 <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="" />
58 </expand> 89 </expand>
59 </when> 90 </when>
60 <when value="sklearn.ensemble_VotingRegressor"> 91 <when value="sklearn.ensemble_VotingRegressor">
61 <expand macro="stacking_voting_weights"/> 92 <expand macro="stacking_voting_weights" />
62 </when> 93 </when>
63 <when value="mlxtend.classifier_StackingCVClassifier"> 94 <when value="mlxtend.classifier_StackingCVClassifier">
64 <expand macro="stacking_ensemble_inputs"> 95 <expand macro="stacking_ensemble_inputs">
65 <expand macro="cv_reduced"/> 96 <expand macro="cv_reduced" />
66 <expand macro="shuffle" label="shuffle"/> 97 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
67 <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/> 98 <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
68 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> 99 </expand>
69 </expand> 100 <section name="meta_estimator" title="Meta Estimator" expanded="true">
70 <section name="meta_estimator" title="Meta Estimator" expanded="true"> 101 <expand macro="stacking_base_estimator" />
71 <expand macro="stacking_base_estimator"/>
72 </section> 102 </section>
73 </when> 103 </when>
74 <when value="mlxtend.classifier_StackingClassifier"> 104 <when value="mlxtend.classifier_StackingClassifier">
75 <expand macro="stacking_ensemble_inputs"> 105 <expand macro="stacking_ensemble_inputs">
76 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> 106 <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
77 <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> 107 <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
78 </expand> 108 <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
79 <section name="meta_estimator" title="Meta Estimator" expanded="true"> 109 </expand>
80 <expand macro="stacking_base_estimator"/> 110 <section name="meta_estimator" title="Meta Estimator" expanded="true">
111 <expand macro="stacking_base_estimator" />
81 </section> 112 </section>
82 </when> 113 </when>
83 <when value="mlxtend.regressor_StackingCVRegressor"> 114 <when value="mlxtend.regressor_StackingCVRegressor">
84 <expand macro="stacking_ensemble_inputs"> 115 <expand macro="stacking_ensemble_inputs">
85 <expand macro="cv_reduced"/> 116 <expand macro="cv_reduced" />
86 <!--TODO support group splitters. Hint: `groups` is a fit_param--> 117 <!--TODO support group splitters. Hint: `groups` is a fit_param-->
87 <expand macro="shuffle" label="shuffle"/> 118 <expand macro="shuffle" label="shuffle" />
88 <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/> 119 <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data." />
89 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/> 120 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
90 </expand> 121 </expand>
91 <section name="meta_estimator" title="Meta Estimator" expanded="true"> 122 <section name="meta_estimator" title="Meta Estimator" expanded="true">
92 <expand macro="stacking_base_estimator"/> 123 <expand macro="stacking_base_estimator" />
93 </section> 124 </section>
94 </when> 125 </when>
95 <when value="mlxtend.regressor_StackingRegressor"> 126 <when value="mlxtend.regressor_StackingRegressor">
96 <expand macro="stacking_ensemble_inputs"> 127 <expand macro="stacking_ensemble_inputs">
97 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/> 128 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
98 </expand> 129 </expand>
99 <section name="meta_estimator" title="Meta Estimator" expanded="true"> 130 <section name="meta_estimator" title="Meta Estimator" expanded="true">
100 <expand macro="stacking_base_estimator"/> 131 <expand macro="stacking_base_estimator" />
101 </section> 132 </section>
102 </when> 133 </when>
103 </conditional> 134 </conditional>
104 <repeat name="base_est_builder" min="1" max="20" title="Base Estimator"> 135 <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
105 <expand macro="stacking_base_estimator"/> 136 <expand macro="stacking_base_estimator" />
106 <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/-->
107 </repeat> 137 </repeat>
108 <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/-->
109 <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?"
110 help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
111 </inputs> 138 </inputs>
112 <outputs> 139 <outputs>
113 <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/> 140 <data format="h5mlm" name="outfile" label="${algo_selection.estimator_type} on ${on_string}" />
114 <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}">
115 <filter>get_params</filter>
116 </data>
117 </outputs> 141 </outputs>
118 <tests> 142 <tests>
119 <test> 143 <test>
120 <conditional name="algo_selection"> 144 <conditional name="algo_selection">
121 <param name="estimator_type" value="sklearn.ensemble_VotingClassifier"/> 145 <param name="estimator_type" value="sklearn.ensemble_VotingClassifier" />
122 <section name="options"> 146 <section name="options">
123 <param name="weights" value="[1, 2]"/> 147 <param name="weights" value="[1, 2]" />
124 </section> 148 </section>
125 </conditional> 149 </conditional>
126 <repeat name="base_est_builder"> 150 <repeat name="base_est_builder">
127 <conditional name="estimator_selector"> 151 <conditional name="estimator_selector">
128 <param name="selected_module" value="svm"/> 152 <param name="selected_module" value="svm" />
129 <param name="selected_estimator" value="SVC"/> 153 <param name="selected_estimator" value="SVC" />
130 </conditional> 154 </conditional>
131 </repeat> 155 </repeat>
132 <repeat name="base_est_builder"> 156 <repeat name="base_est_builder">
133 <conditional name="estimator_selector"> 157 <conditional name="estimator_selector">
134 <param name="selected_module" value="xgboost"/> 158 <param name="selected_module" value="xgboost" />
135 <param name="selected_estimator" value="XGBClassifier"/> 159 <param name="selected_estimator" value="XGBClassifier" />
136 </conditional> 160 </conditional>
137 </repeat> 161 </repeat>
138 <param name="get_params" value="false"/> 162 <output name="outfile" file="StackingVoting03.h5mlm" compare="sim_size" delta="5" />
139 <output name="outfile" file="StackingVoting03.zip" compare="sim_size" delta="5"/>
140 </test> 163 </test>
141 <test> 164 <test>
142 <conditional name="algo_selection"> 165 <conditional name="algo_selection">
143 <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor"/> 166 <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor" />
144 <section name="meta_estimator"> 167 <section name="meta_estimator">
145 <conditional name="estimator_selector"> 168 <conditional name="estimator_selector">
146 <param name="selected_module" value="custom_estimator"/> 169 <param name="selected_module" value="custom_estimator" />
147 <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/> 170 <param name="c_estimator" value="LinearRegression01.h5mlm" ftype="h5mlm" />
148 </conditional> 171 </conditional>
149 </section> 172 </section>
150 </conditional> 173 </conditional>
151 <repeat name="base_est_builder"> 174 <repeat name="base_est_builder">
152 <conditional name="estimator_selector"> 175 <conditional name="estimator_selector">
153 <param name="selected_module" value="custom_estimator"/> 176 <param name="selected_module" value="custom_estimator" />
154 <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/> 177 <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" />
155 </conditional> 178 </conditional>
156 </repeat> 179 </repeat>
157 <repeat name="base_est_builder"> 180 <repeat name="base_est_builder">
158 <conditional name="estimator_selector"> 181 <conditional name="estimator_selector">
159 <param name="selected_module" value="custom_estimator"/> 182 <param name="selected_module" value="custom_estimator" />
160 <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/> 183 <param name="c_estimator" value="XGBRegressor01.h5mlm" ftype="h5mlm" />
161 </conditional> 184 </conditional>
162 </repeat> 185 </repeat>
163 <param name="get_params" value="false"/> 186 <output name="outfile" file="StackingCVRegressor01.h5mlm" compare="sim_size" delta="5" />
164 <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/>
165 </test> 187 </test>
166 <test> 188 <test>
167 <conditional name="algo_selection"> 189 <conditional name="algo_selection">
168 <param name="estimator_type" value="mlxtend.regressor_StackingRegressor"/> 190 <param name="estimator_type" value="mlxtend.regressor_StackingRegressor" />
169 <section name="meta_estimator"> 191 <section name="meta_estimator">
170 <conditional name="estimator_selector"> 192 <conditional name="estimator_selector">
171 <param name="selected_module" value="svm"/> 193 <param name="selected_module" value="svm" />
172 <param name="selected_estimator" value="SVR"/> 194 <param name="selected_estimator" value="SVR" />
173 </conditional> 195 </conditional>
174 </section> 196 </section>
175 </conditional> 197 </conditional>
176 <repeat name="base_est_builder"> 198 <repeat name="base_est_builder">
177 <conditional name="estimator_selector"> 199 <conditional name="estimator_selector">
178 <param name="selected_module" value="custom_estimator"/> 200 <param name="selected_module" value="custom_estimator" />
179 <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/> 201 <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" />
180 </conditional> 202 </conditional>
181 </repeat> 203 </repeat>
182 <repeat name="base_est_builder"> 204 <repeat name="base_est_builder">
183 <conditional name="estimator_selector"> 205 <conditional name="estimator_selector">
184 <param name="selected_module" value="xgboost"/> 206 <param name="selected_module" value="xgboost" />
185 <param name="selected_estimator" value="XGBRegressor"/> 207 <param name="selected_estimator" value="XGBRegressor" />
186 </conditional> 208 </conditional>
187 </repeat> 209 </repeat>
188 <param name="get_params" value="false"/> 210 <output name="outfile" file="StackingRegressor02.h5mlm" compare="sim_size" delta="5" />
189 <output name="outfile" file="StackingRegressor02.zip" compare="sim_size" delta="5"/>
190 </test> 211 </test>
191 </tests> 212 </tests>
192 <help> 213 <help>
193 <![CDATA[ 214 <![CDATA[
194 This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train 215 This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
195 on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous 216 on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
196 prediction results to ensemble a strong learner. 217 prediction results to ensemble a strong learner.
197 Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_. 218 Refer to https://h2o-release.s3.amazonaws.com/h2o/rel-ueno/2/docs-website/h2o-docs/data-science/stacked-ensembles.html.
198 219
199 .. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`:
200 http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction
201 220
202 ]]> 221 ]]>
203 </help> 222 </help>
204 <expand macro="sklearn_citation"> 223 <expand macro="sklearn_citation">
205 <expand macro="skrebate_citation"/> 224 <expand macro="skrebate_citation" />
206 <expand macro="xgboost_citation"/> 225 <expand macro="xgboost_citation" />
207 <expand macro="imblearn_citation"/> 226 <expand macro="imblearn_citation" />
208 <citation type="bibtex"> 227 <citation type="bibtex">
209 @article{raschkas_2018_mlxtend, 228 @article{raschkas_2018_mlxtend,
210 author = {Sebastian Raschka}, 229 author = {Sebastian Raschka},
211 title = {MLxtend: Providing machine learning and data science 230 title = {MLxtend: Providing machine learning and data science
212 utilities and extensions to Python’s 231 utilities and extensions to Python’s