sklearn_ensemble: ensemble.xml comparison

comparison ensemble.xml @ 38:142f27ae0806 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 208a8d348e7c7a182cfbe1b6f17868146428a7e2"

author	bgruening
date	Tue, 13 Apr 2021 21:05:37 +0000
parents	c49ad9558f6a
children	fce065687d98

comparison

equal deleted inserted replaced

-:3b4d0b8ff93a
+:142f27ae0806
-<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@">
+<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="20.05">
 <description>for classification and regression</description>
 <macros>
 <import>main_macros.xml</import>
 </macros>
-<expand macro="python_requirements"/>
+<expand macro="python_requirements" />
-<expand macro="macro_stdio"/>
+<expand macro="macro_stdio" />
 <version_command>echo "@VERSION@"</version_command>
 <command><![CDATA[
 python "$ensemble_script" '$inputs'
 ]]>
 </command>
 <configfiles>
-<inputs name="inputs"/>
+<inputs name="inputs" />
 <configfile name="ensemble_script">
 <![CDATA[
 import json
 import numpy as np
 import pandas
 import pickle
 import sys
 <option value="RandomForestRegressor">Random forest regressor</option>
 <option value="AdaBoostRegressor">Ada boost regressor</option>
 <option value="GradientBoostingRegressor">Gradient Boosting Regressor</option>
 </param>
 <when value="RandomForestClassifier">
-<expand macro="sl_mixed_input"/>
+<expand macro="sl_mixed_input" />
 <section name="options" title="Advanced Options" expanded="False">
-<expand macro="n_estimators" default_value="100"/>
+<expand macro="n_estimators" default_value="100" />
-<expand macro="criterion"/>
+<expand macro="criterion" />
-<expand macro="max_features"/>
+<expand macro="max_features" />
-<expand macro="max_depth"/>
+<expand macro="max_depth" />
-<expand macro="min_samples_split"/>
+<expand macro="min_samples_split" />
-<expand macro="min_samples_leaf"/>
+<expand macro="min_samples_leaf" />
-<expand macro="min_weight_fraction_leaf"/>
+<expand macro="min_weight_fraction_leaf" />
-<expand macro="max_leaf_nodes"/>
+<expand macro="max_leaf_nodes" />
-<expand macro="bootstrap"/>
+<expand macro="bootstrap" />
-<expand macro="warm_start" checked="false"/>
+<expand macro="warm_start" checked="false" />
-<expand macro="random_state"/>
+<expand macro="random_state" />
-<expand macro="oob_score"/>
+<expand macro="oob_score" />
 <!--class_weight=None-->
 </section>
 </when>
 <when value="AdaBoostClassifier">
-<expand macro="sl_mixed_input"/>
+<expand macro="sl_mixed_input" />
 <section name="options" title="Advanced Options" expanded="False">
 <!--base_estimator=None-->
-<expand macro="n_estimators" default_value="50"/>
+<expand macro="n_estimators" default_value="50" />
-<expand macro="learning_rate"/>
+<expand macro="learning_rate" />
-<param argument="algorithm" type="select" label="Boosting algorithm"  help=" ">
+<param argument="algorithm" type="select" label="Boosting algorithm" help=" ">
 <option value="SAMME.R" selected="true">SAMME.R</option>
 <option value="SAMME">SAMME</option>
 </param>
-<expand macro="random_state"/>
+<expand macro="random_state" />
 </section>
 </when>
 <when value="GradientBoostingClassifier">
-<expand macro="sl_mixed_input"/>
+<expand macro="sl_mixed_input" />
 <section name="options" title="Advanced Options" expanded="False">
 <!--base_estimator=None-->
 <param argument="loss" type="select" label="Loss function">
 <option value="deviance" selected="true">deviance - logistic regression with probabilistic outputs</option>
 <option value="exponential">exponential - gradient boosting recovers the AdaBoost algorithm</option>
 </param>
-<expand macro="learning_rate" default_value='0.1'/>
+<expand macro="learning_rate" default_value='0.1' />
-<expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/>
+<expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform" />
-<expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/>
+<expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators" />
 <expand macro="criterion2">
 <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option>
 </expand>
-<expand macro="min_samples_split" type="float"/>
+<expand macro="min_samples_split" type="float" />
-<expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/>
+<expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node" />
-<expand macro="min_weight_fraction_leaf"/>
+<expand macro="min_weight_fraction_leaf" />
-<expand macro="subsample"/>
+<expand macro="subsample" />
-<expand macro="max_features"/>
+<expand macro="max_features" />
-<expand macro="max_leaf_nodes"/>
+<expand macro="max_leaf_nodes" />
-<expand macro="min_impurity_decrease"/>
+<expand macro="min_impurity_decrease" />
-<expand macro="verbose"/>
+<expand macro="verbose" />
-<expand macro="warm_start" checked="false"/>
+<expand macro="warm_start" checked="false" />
-<expand macro="random_state"/>
+<expand macro="random_state" />
-<expand macro="presort"/>
+<expand macro="presort" />
 </section>
 </when>
 <when value="RandomForestRegressor">
-<expand macro="sl_mixed_input"/>
+<expand macro="sl_mixed_input" />
 <section name="options" title="Advanced Options" expanded="False">
-<expand macro="n_estimators" default_value="100"/>
+<expand macro="n_estimators" default_value="100" />
-<expand macro="criterion2"/>
+<expand macro="criterion2" />
-<expand macro="max_features"/>
+<expand macro="max_features" />
-<expand macro="max_depth"/>
+<expand macro="max_depth" />
-<expand macro="min_samples_split"/>
+<expand macro="min_samples_split" />
-<expand macro="min_samples_leaf"/>
+<expand macro="min_samples_leaf" />
-<expand macro="min_weight_fraction_leaf"/>
+<expand macro="min_weight_fraction_leaf" />
-<expand macro="max_leaf_nodes"/>
+<expand macro="max_leaf_nodes" />
-<expand macro="min_impurity_decrease"/>
+<expand macro="min_impurity_decrease" />
-<expand macro="bootstrap"/>
+<expand macro="bootstrap" />
-<expand macro="oob_score"/>
+<expand macro="oob_score" />
-<expand macro="random_state"/>
+<expand macro="random_state" />
-<expand macro="verbose"/>
+<expand macro="verbose" />
-<expand macro="warm_start" checked="false"/>
+<expand macro="warm_start" checked="false" />
 </section>
 </when>
 <when value="AdaBoostRegressor">
-<expand macro="sl_mixed_input"/>
+<expand macro="sl_mixed_input" />
 <section name="options" title="Advanced Options" expanded="False">
 <!--base_estimator=None-->
-<expand macro="n_estimators" default_value="50"/>
+<expand macro="n_estimators" default_value="50" />
-<expand macro="learning_rate"/>
+<expand macro="learning_rate" />
-<param argument="loss" type="select" label="Loss function"  optional="true" help="Used when updating the weights after each boosting iteration. ">
+<param argument="loss" type="select" label="Loss function" optional="true" help="Used when updating the weights after each boosting iteration. ">
 <option value="linear" selected="true">linear</option>
 <option value="square">square</option>
 <option value="exponential">exponential</option>
 </param>
-<expand macro="random_state"/>
+<expand macro="random_state" />
 </section>
 </when>
 <when value="GradientBoostingRegressor">
-<expand macro="sl_mixed_input"/>
+<expand macro="sl_mixed_input" />
 <section name="options" title="Advanced Options" expanded="False">
 <param argument="loss" type="select" label="Loss function">
 <option value="ls" selected="true">ls - least squares regression</option>
 <option value="lad">lad - least absolute deviation</option>
 <option value="huber">huber - combination of least squares regression and least absolute deviation</option>
 <option value="quantile">quantile - use alpha to specify the quantile</option>
 </param>
-<expand macro="learning_rate" default_value="0.1"/>
+<expand macro="learning_rate" default_value="0.1" />
-<expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/>
+<expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform" />
-<expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/>
+<expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators" />
 <expand macro="criterion2">
 <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option>
 </expand>
-<expand macro="min_samples_split" type="float"/>
+<expand macro="min_samples_split" type="float" />
-<expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/>
+<expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node" />
-<expand macro="min_weight_fraction_leaf"/>
+<expand macro="min_weight_fraction_leaf" />
-<expand macro="subsample"/>
+<expand macro="subsample" />
-<expand macro="max_features"/>
+<expand macro="max_features" />
-<expand macro="max_leaf_nodes"/>
+<expand macro="max_leaf_nodes" />
-<expand macro="min_impurity_decrease"/>
+<expand macro="min_impurity_decrease" />
 <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" />
 <!--base_estimator=None-->
-<expand macro="verbose"/>
+<expand macro="verbose" />
-<expand macro="warm_start" checked="false"/>
+<expand macro="warm_start" checked="false" />
-<expand macro="random_state"/>
+<expand macro="random_state" />
-<expand macro="presort"/>
+<expand macro="presort" />
 </section>
 </when>
 </expand>
 </inputs>
-<expand macro="output"/>
+<expand macro="output" />
 <tests>
 <test>
-<param name="infile1" value="train.tabular" ftype="tabular"/>
+<param name="infile1" value="train.tabular" ftype="tabular" />
-<param name="infile2" value="train.tabular" ftype="tabular"/>
+<param name="infile2" value="train.tabular" ftype="tabular" />
-<param name="col1" value="1,2,3,4"/>
+<param name="col1" value="1,2,3,4" />
-<param name="col2" value="5"/>
+<param name="col2" value="5" />
-<param name="selected_task" value="train"/>
+<param name="selected_task" value="train" />
-<param name="selected_algorithm" value="RandomForestClassifier"/>
+<param name="selected_algorithm" value="RandomForestClassifier" />
-<param name="random_state" value="10"/>
+<param name="random_state" value="10" />
-<output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5"/>
+<output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="rfc_model01" ftype="zip"/>
+<param name="infile_model" value="rfc_model01" ftype="zip" />
-<param name="infile_data" value="test.tabular" ftype="tabular"/>
+<param name="infile_data" value="test.tabular" ftype="tabular" />
-<param name="selected_task" value="load"/>
+<param name="selected_task" value="load" />
-<output name="outfile_predict" file="rfc_result01"/>
+<output name="outfile_predict" file="rfc_result01" />
 </test>
 <test>
-<param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+<param name="infile1" value="regression_train.tabular" ftype="tabular" />
-<param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+<param name="infile2" value="regression_train.tabular" ftype="tabular" />
-<param name="col1" value="1,2,3,4,5"/>
+<param name="col1" value="1,2,3,4,5" />
-<param name="col2" value="6"/>
+<param name="col2" value="6" />
-<param name="selected_task" value="train"/>
+<param name="selected_task" value="train" />
-<param name="selected_algorithm" value="RandomForestRegressor"/>
+<param name="selected_algorithm" value="RandomForestRegressor" />
-<param name="random_state" value="10"/>
+<param name="random_state" value="10" />
-<output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5"/>
+<output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="rfr_model01" ftype="zip"/>
+<param name="infile_model" value="rfr_model01" ftype="zip" />
-<param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
+<param name="infile_data" value="regression_test.tabular" ftype="tabular" />
-<param name="selected_task" value="load"/>
+<param name="selected_task" value="load" />
-<output name="outfile_predict" file="rfr_result01"/>
+<output name="outfile_predict" file="rfr_result01" />
 </test>
 <test>
-<param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+<param name="infile1" value="regression_X.tabular" ftype="tabular" />
-<param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+<param name="infile2" value="regression_y.tabular" ftype="tabular" />
-<param name="header1" value="True"/>
+<param name="header1" value="True" />
-<param name="selected_column_selector_option" value="all_columns"/>
+<param name="selected_column_selector_option" value="all_columns" />
-<param name="header2" value="True"/>
+<param name="header2" value="True" />
-<param name="col2" value="1"/>
+<param name="col2" value="1" />
-<param name="selected_task" value="train"/>
+<param name="selected_task" value="train" />
-<param name="selected_algorithm" value="GradientBoostingRegressor"/>
+<param name="selected_algorithm" value="GradientBoostingRegressor" />
-<param name="max_features" value="number_input"/>
+<param name="max_features" value="number_input" />
-<param name="num_max_features" value="0.5"/>
+<param name="num_max_features" value="0.5" />
-<param name="random_state" value="42"/>
+<param name="random_state" value="42" />
-<output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5"/>
+<output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="gbr_model01" ftype="zip"/>
+<param name="infile_model" value="gbr_model01" ftype="zip" />
-<param name="infile_data" value="regression_test_X.tabular" ftype="tabular"/>
+<param name="infile_data" value="regression_test_X.tabular" ftype="tabular" />
-<param name="selected_task" value="load"/>
+<param name="selected_task" value="load" />
-<param name="header" value="True"/>
+<param name="header" value="True" />
-<output name="outfile_predict" file="gbr_prediction_result01.tabular"/>
+<output name="outfile_predict" file="gbr_prediction_result01.tabular" />
 </test>
 <test>
-<param name="infile1" value="train.tabular" ftype="tabular"/>
+<param name="infile1" value="train.tabular" ftype="tabular" />
-<param name="infile2" value="train.tabular" ftype="tabular"/>
+<param name="infile2" value="train.tabular" ftype="tabular" />
-<param name="col1" value="1,2,3,4"/>
+<param name="col1" value="1,2,3,4" />
-<param name="col2" value="5"/>
+<param name="col2" value="5" />
-<param name="selected_task" value="train"/>
+<param name="selected_task" value="train" />
-<param name="selected_algorithm" value="GradientBoostingClassifier"/>
+<param name="selected_algorithm" value="GradientBoostingClassifier" />
-<output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5"/>
+<output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="gbc_model01" ftype="zip"/>
+<param name="infile_model" value="gbc_model01" ftype="zip" />
-<param name="infile_data" value="test.tabular" ftype="tabular"/>
+<param name="infile_data" value="test.tabular" ftype="tabular" />
-<param name="selected_task" value="load"/>
+<param name="selected_task" value="load" />
-<output name="outfile_predict" file="gbc_result01"/>
+<output name="outfile_predict" file="gbc_result01" />
 </test>
 <test>
-<param name="infile1" value="train.tabular" ftype="tabular"/>
+<param name="infile1" value="train.tabular" ftype="tabular" />
-<param name="infile2" value="train.tabular" ftype="tabular"/>
+<param name="infile2" value="train.tabular" ftype="tabular" />
-<param name="col1" value="1,2,3,4"/>
+<param name="col1" value="1,2,3,4" />
-<param name="col2" value="5"/>
+<param name="col2" value="5" />
-<param name="selected_task" value="train"/>
+<param name="selected_task" value="train" />
-<param name="selected_algorithm" value="AdaBoostClassifier"/>
+<param name="selected_algorithm" value="AdaBoostClassifier" />
-<param name="random_state" value="10"/>
+<param name="random_state" value="10" />
-<output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5"/>
+<output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="abc_model01" ftype="zip"/>
+<param name="infile_model" value="abc_model01" ftype="zip" />
-<param name="infile_data" value="test.tabular" ftype="tabular"/>
+<param name="infile_data" value="test.tabular" ftype="tabular" />
-<param name="selected_task" value="load"/>
+<param name="selected_task" value="load" />
-<output name="outfile_predict" file="abc_result01"/>
+<output name="outfile_predict" file="abc_result01" />
 </test>
 <test>
-<param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+<param name="infile1" value="regression_train.tabular" ftype="tabular" />
-<param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+<param name="infile2" value="regression_train.tabular" ftype="tabular" />
-<param name="col1" value="1,2,3,4,5"/>
+<param name="col1" value="1,2,3,4,5" />
-<param name="col2" value="6"/>
+<param name="col2" value="6" />
-<param name="selected_task" value="train"/>
+<param name="selected_task" value="train" />
-<param name="selected_algorithm" value="AdaBoostRegressor"/>
+<param name="selected_algorithm" value="AdaBoostRegressor" />
-<param name="random_state" value="10"/>
+<param name="random_state" value="10" />
-<output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5"/>
+<output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="abr_model01" ftype="zip"/>
+<param name="infile_model" value="abr_model01" ftype="zip" />
-<param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
+<param name="infile_data" value="regression_test.tabular" ftype="tabular" />
-<param name="selected_task" value="load"/>
+<param name="selected_task" value="load" />
-<output name="outfile_predict" file="abr_result01"/>
+<output name="outfile_predict" file="abr_result01" />
 </test>
 </tests>
 <help><![CDATA[
 ***What it does***
 The goal of ensemble methods is to combine the predictions of several base estimators built with a given learning algorithm in order to improve generalizability / robustness over a single estimator. This tool offers two sets of ensemble algorithms for classification and regression: random forests and ADA boosting which are based on sklearn.ensemble library from Scikit-learn. Here you can find out about the input, output and methods presented in the tools. For information about ensemble methods and parameters settings please refer to `Scikit-learn ensemble`_.
 **3 - Prediction output**
 The tool predicts the class labels for new samples and adds them as the last column to the prediction dataset. The new dataset then is output as a tabular file. The prediction output format should look like the training dataset.
-]]></help>
+]]>    </help>
-<expand macro="sklearn_citation"/>
+<expand macro="sklearn_citation" />
 </tool>

Mercurial > repos > bgruening > sklearn_ensemble

comparison ensemble.xml @ 38:142f27ae0806 draft