diff stacking_ensembles.xml @ 3:0a1812986bc3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 11:10:37 +0000
parents 38c4f8a98038
children
line wrap: on
line diff
--- a/stacking_ensembles.xml	Mon Dec 16 10:07:37 2019 +0000
+++ b/stacking_ensembles.xml	Wed Aug 09 11:10:37 2023 +0000
@@ -1,10 +1,44 @@
-<tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@">
+<tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@" profile="@PROFILE@">
     <description>builds stacking, voting ensemble models with numerous base options</description>
     <macros>
         <import>main_macros.xml</import>
+        <macro name="stacking_ensemble_inputs">
+            <section name="options" title="Advanced Options" expanded="false">
+                <yield />
+                <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
+                <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
+            </section>
+        </macro>
+        <macro name="stacking_base_estimator">
+            <conditional name="estimator_selector">
+                <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
+                    <expand macro="estimator_module_options">
+                        <option value="custom_estimator">Load a custom estimator</option>
+                    </expand>
+                </param>
+                <expand macro="estimator_suboptions">
+                    <when value="custom_estimator">
+                        <param name="c_estimator" type="data" format="h5mlm" label="Choose the dataset containing the custom estimator or pipeline" />
+                    </when>
+                </expand>
+            </conditional>
+        </macro>
+        <macro name="stacking_voting_weights">
+            <section name="options" title="Advanced Options" expanded="false">
+                <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`).">
+                <sanitizer>
+                    <valid initial="default">
+                    <add value="[" />
+                    <add value="]" />
+                    </valid>
+                </sanitizer>
+                </param>
+                <yield />
+            </section>
+        </macro>
     </macros>
-    <expand macro="python_requirements"/>
-    <expand macro="macro_stdio"/>
+    <expand macro="python_requirements" />
+    <expand macro="macro_stdio" />
     <version_command>echo "$ENSEMBLE_VERSION"</version_command>
     <command>
         <![CDATA[
@@ -30,9 +64,6 @@
             --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}'
             #end if
             #end if
-            #if $get_params
-            --outfile_params '$outfile_params'
-            #end if
         ]]>
     </command>
     <configfiles>
@@ -54,139 +85,129 @@
                         <option value="hard" selected="true">hard</option>
                         <option value="soft">soft</option>
                     </param>
-                    <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help=""/>
+                    <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="" />
                 </expand>
             </when>
             <when value="sklearn.ensemble_VotingRegressor">
-                <expand macro="stacking_voting_weights"/>
+                <expand macro="stacking_voting_weights" />
             </when>
             <when value="mlxtend.classifier_StackingCVClassifier">
                 <expand macro="stacking_ensemble_inputs">
-                    <expand macro="cv_reduced"/>
-                    <expand macro="shuffle" label="shuffle"/>
-                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
-                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+                    <expand macro="cv_reduced" />
+                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
+                    <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                 </expand>
                 <section name="meta_estimator" title="Meta Estimator" expanded="true">
-                    <expand macro="stacking_base_estimator"/>
+                    <expand macro="stacking_base_estimator" />
                 </section>
             </when>
             <when value="mlxtend.classifier_StackingClassifier">
                 <expand macro="stacking_ensemble_inputs">
-                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
-                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
+                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
+                    <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                 </expand>
                 <section name="meta_estimator" title="Meta Estimator" expanded="true">
-                    <expand macro="stacking_base_estimator"/>
+                    <expand macro="stacking_base_estimator" />
                 </section>
             </when>
             <when value="mlxtend.regressor_StackingCVRegressor">
                 <expand macro="stacking_ensemble_inputs">
-                    <expand macro="cv_reduced"/>
+                    <expand macro="cv_reduced" />
                     <!--TODO support group splitters. Hint: `groups` is a fit_param-->
-                    <expand macro="shuffle" label="shuffle"/>
-                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
-                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
+                    <expand macro="shuffle" label="shuffle" />
+                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data." />
+                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
                 </expand>
                 <section name="meta_estimator" title="Meta Estimator" expanded="true">
-                    <expand macro="stacking_base_estimator"/>
+                    <expand macro="stacking_base_estimator" />
                 </section>
             </when>
             <when value="mlxtend.regressor_StackingRegressor">
                 <expand macro="stacking_ensemble_inputs">
-                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
+                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
                 </expand>
                 <section name="meta_estimator" title="Meta Estimator" expanded="true">
-                    <expand macro="stacking_base_estimator"/>
+                    <expand macro="stacking_base_estimator" />
                 </section>
             </when>
         </conditional>
         <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
-            <expand macro="stacking_base_estimator"/>
-            <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/-->
+            <expand macro="stacking_base_estimator" />
         </repeat>
-        <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/-->
-        <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?"
-                help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
     </inputs>
     <outputs>
-        <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/>
-        <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}">
-            <filter>get_params</filter>
-        </data>
+        <data format="h5mlm" name="outfile" label="${algo_selection.estimator_type} on ${on_string}" />
     </outputs>
     <tests>
         <test>
             <conditional name="algo_selection">
-                <param name="estimator_type" value="sklearn.ensemble_VotingClassifier"/>
+                <param name="estimator_type" value="sklearn.ensemble_VotingClassifier" />
                 <section name="options">
-                    <param name="weights" value="[1, 2]"/>
+                    <param name="weights" value="[1, 2]" />
                 </section>
             </conditional>
             <repeat name="base_est_builder">
                 <conditional name="estimator_selector">
-                    <param name="selected_module" value="svm"/>
-                    <param name="selected_estimator" value="SVC"/>
+                    <param name="selected_module" value="svm" />
+                    <param name="selected_estimator" value="SVC" />
                 </conditional>
             </repeat>
             <repeat name="base_est_builder">
                 <conditional name="estimator_selector">
-                    <param name="selected_module" value="xgboost"/>
-                    <param name="selected_estimator" value="XGBClassifier"/>
+                    <param name="selected_module" value="xgboost" />
+                    <param name="selected_estimator" value="XGBClassifier" />
                 </conditional>
             </repeat>
-            <param name="get_params" value="false"/>
-            <output name="outfile" file="StackingVoting03.zip" compare="sim_size" delta="5"/>
+            <output name="outfile" file="StackingVoting03.h5mlm" compare="sim_size" delta="5" />
         </test>
         <test>
             <conditional name="algo_selection">
-                <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor"/>
+                <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor" />
                 <section name="meta_estimator">
                     <conditional name="estimator_selector">
-                        <param name="selected_module" value="custom_estimator"/>
-                        <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/>
+                        <param name="selected_module" value="custom_estimator" />
+                        <param name="c_estimator" value="LinearRegression01.h5mlm" ftype="h5mlm" />
                     </conditional>
                 </section>
             </conditional>
             <repeat name="base_est_builder">
                 <conditional name="estimator_selector">
-                    <param name="selected_module" value="custom_estimator"/>
-                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
+                    <param name="selected_module" value="custom_estimator" />
+                    <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" />
                 </conditional>
             </repeat>
             <repeat name="base_est_builder">
                 <conditional name="estimator_selector">
-                    <param name="selected_module" value="custom_estimator"/>
-                    <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/>
+                    <param name="selected_module" value="custom_estimator" />
+                    <param name="c_estimator" value="XGBRegressor01.h5mlm" ftype="h5mlm" />
                 </conditional>
             </repeat>
-            <param name="get_params" value="false"/>
-            <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/>
+            <output name="outfile" file="StackingCVRegressor01.h5mlm" compare="sim_size" delta="5" />
         </test>
         <test>
             <conditional name="algo_selection">
-                <param name="estimator_type" value="mlxtend.regressor_StackingRegressor"/>
+                <param name="estimator_type" value="mlxtend.regressor_StackingRegressor" />
                 <section name="meta_estimator">
                     <conditional name="estimator_selector">
-                        <param name="selected_module" value="svm"/>
-                        <param name="selected_estimator" value="SVR"/>
+                        <param name="selected_module" value="svm" />
+                        <param name="selected_estimator" value="SVR" />
                     </conditional>
                 </section>
             </conditional>
             <repeat name="base_est_builder">
                 <conditional name="estimator_selector">
-                    <param name="selected_module" value="custom_estimator"/>
-                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
+                    <param name="selected_module" value="custom_estimator" />
+                    <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" />
                 </conditional>
             </repeat>
             <repeat name="base_est_builder">
                 <conditional name="estimator_selector">
-                    <param name="selected_module" value="xgboost"/>
-                    <param name="selected_estimator" value="XGBRegressor"/>
+                    <param name="selected_module" value="xgboost" />
+                    <param name="selected_estimator" value="XGBRegressor" />
                 </conditional>
             </repeat>
-            <param name="get_params" value="false"/>
-            <output name="outfile" file="StackingRegressor02.zip" compare="sim_size" delta="5"/>
+            <output name="outfile" file="StackingRegressor02.h5mlm" compare="sim_size" delta="5" />
         </test>
     </tests>
     <help>
@@ -194,17 +215,15 @@
 This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
 on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
 prediction results to ensemble a strong learner.
-Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_.
+Refer to https://h2o-release.s3.amazonaws.com/h2o/rel-ueno/2/docs-website/h2o-docs/data-science/stacked-ensembles.html.
 
-.. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`:
- http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction
 
         ]]>
     </help>
     <expand macro="sklearn_citation">
-        <expand macro="skrebate_citation"/>
-        <expand macro="xgboost_citation"/>
-        <expand macro="imblearn_citation"/>
+        <expand macro="skrebate_citation" />
+        <expand macro="xgboost_citation" />
+        <expand macro="imblearn_citation" />
         <citation type="bibtex">
             @article{raschkas_2018_mlxtend,
                 author       = {Sebastian Raschka},