Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 2:5a06c81f044d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832
| author | bgruening |
|---|---|
| date | Tue, 22 May 2018 19:27:45 -0400 |
| parents | 2bbacfaadb5c |
| children | 0dc80ab8ec21 |
comparison
equal
deleted
inserted
replaced
| 1:f017e93ceda7 | 2:5a06c81f044d |
|---|---|
| 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@"> | 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1"> |
| 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> | 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="python_requirements"/> | 6 <expand macro="python_requirements"/> |
| 26 @COLUMNS_FUNCTION@ | 26 @COLUMNS_FUNCTION@ |
| 27 | 27 |
| 28 input_json_path = sys.argv[1] | 28 input_json_path = sys.argv[1] |
| 29 params = json.load(open(input_json_path, "r")) | 29 params = json.load(open(input_json_path, "r")) |
| 30 | 30 |
| 31 ## Read features | |
| 32 features_has_header = params["input_options"]["header1"] | |
| 31 input_type = params["input_options"]["selected_input"] | 33 input_type = params["input_options"]["selected_input"] |
| 32 if input_type=="tabular": | 34 if input_type=="tabular": |
| 35 header = 'infer' if features_has_header else None | |
| 33 header = 'infer' if params["input_options"]["header1"] else None | 36 header = 'infer' if params["input_options"]["header1"] else None |
| 34 X = read_columns( | 37 X, input_df = read_columns( |
| 35 "$input_options.infile1", | 38 "$input_options.infile1", |
| 36 "$input_options.col1", | 39 "$input_options.col1", |
| 40 return_df = True, | |
| 37 sep='\t', | 41 sep='\t', |
| 38 header=header, | 42 header=header, |
| 39 parse_dates=True | 43 parse_dates=True |
| 40 ) | 44 ) |
| 41 else: | 45 else: |
| 42 X = mmread(open("$input_options.infile1", 'r')) | 46 X = mmread(open("$input_options.infile1", 'r')) |
| 43 | 47 |
| 48 ## Read labels | |
| 44 header = 'infer' if params["input_options"]["header2"] else None | 49 header = 'infer' if params["input_options"]["header2"] else None |
| 45 y = read_columns( | 50 y = read_columns( |
| 46 "$input_options.infile2", | 51 "$input_options.infile2", |
| 47 "$input_options.col2", | 52 "$input_options.col2", |
| 48 sep='\t', | 53 sep='\t', |
| 49 header=header, | 54 header=header, |
| 50 parse_dates=True | 55 parse_dates=True |
| 51 ) | 56 ) |
| 52 y=y.ravel() | 57 y=y.ravel() |
| 53 | 58 |
| 59 ## Create feature selector | |
| 54 selector = params["feature_selection_algorithms"]["selected_algorithm"] | 60 selector = params["feature_selection_algorithms"]["selected_algorithm"] |
| 55 selector = getattr(sklearn.feature_selection, selector) | 61 selector = getattr(sklearn.feature_selection, selector) |
| 56 options = params["feature_selection_algorithms"]["options"] | 62 options = params["feature_selection_algorithms"]["options"] |
| 57 | 63 |
| 58 #if $feature_selection_algorithms.selected_algorithm == 'SelectFromModel': | 64 if params['feature_selection_algorithms']['selected_algorithm'] == 'SelectFromModel': |
| 59 if not options['threshold'] or options['threshold'] == 'None': | 65 if not options['threshold'] or options['threshold'] == 'None': |
| 60 options['threshold'] = None | 66 options['threshold'] = None |
| 61 #if $feature_selection_algorithms.extra_estimator.has_estimator == 'no_load': | 67 if 'extra_estimator' in params['feature_selection_algorithms'] and params['feature_selection_algorithms']['extra_estimator']['has_estimator'] == 'no_load': |
| 62 fitted_estimator = pickle.load(open("$feature_selection_algorithms.extra_estimator.fitted_estimator", 'r')) | 68 fitted_estimator = pickle.load(open("params['feature_selection_algorithms']['extra_estimator']['fitted_estimator']", 'r')) |
| 63 new_selector = selector(fitted_estimator, prefit=True, **options) | 69 new_selector = selector(fitted_estimator, prefit=True, **options) |
| 64 #else: | 70 else: |
| 65 estimator=params["feature_selection_algorithms"]["estimator"] | 71 estimator=params["feature_selection_algorithms"]["estimator"] |
| 66 if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': | 72 if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': |
| 67 estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] | 73 estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] |
| 68 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | 74 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) |
| 69 new_selector = selector(estimator, **options) | 75 new_selector = selector(estimator, **options) |
| 70 new_selector.fit(X, y) | 76 new_selector.fit(X, y) |
| 71 #end if | 77 |
| 72 | 78 elif params['feature_selection_algorithms']['selected_algorithm'] in ['RFE', 'RFECV']: |
| 73 #elif $feature_selection_algorithms.selected_algorithm in ['RFE', 'RFECV']: | 79 if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): |
| 74 if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): | 80 options['scoring'] = None |
| 75 options['scoring'] = None | 81 estimator=params["feature_selection_algorithms"]["estimator"] |
| 76 estimator=params["feature_selection_algorithms"]["estimator"] | 82 if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': |
| 77 if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': | 83 estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] |
| 78 estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] | 84 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) |
| 79 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | 85 new_selector = selector(estimator, **options) |
| 80 new_selector = selector(estimator, **options) | 86 new_selector.fit(X, y) |
| 81 new_selector.fit(X, y) | 87 |
| 82 | 88 elif params['feature_selection_algorithms']['selected_algorithm'] == "VarianceThreshold": |
| 83 #elif $feature_selection_algorithms.selected_algorithm == "VarianceThreshold": | 89 new_selector = selector(**options) |
| 84 new_selector = selector(**options) | 90 new_selector.fit(X, y) |
| 85 new_selector.fit(X, y) | 91 |
| 86 | 92 else: |
| 87 #else: | 93 score_func = params["feature_selection_algorithms"]["score_func"] |
| 88 score_func = params["feature_selection_algorithms"]["score_func"] | 94 score_func = getattr(sklearn.feature_selection, score_func) |
| 89 score_func = getattr(sklearn.feature_selection, score_func) | 95 new_selector = selector(score_func, **options) |
| 90 new_selector = selector(score_func, **options) | 96 new_selector.fit(X, y) |
| 91 new_selector.fit(X, y) | 97 |
| 92 #end if | 98 ## Transform to select features |
| 93 | 99 selected_names = None |
| 94 #if $select_methods.selected_method == "fit_transform": | 100 if "$select_methods.selected_method" == "fit_transform": |
| 95 res = new_selector.transform(X) | 101 res = new_selector.transform(X) |
| 96 | 102 if features_has_header: |
| 97 #else: | 103 selected_names = input_df.columns[new_selector.get_support(indices=True)] |
| 98 res = new_selector.get_support(params["select_methods"]["indices"]) | 104 else: |
| 99 #end if | 105 res = new_selector.get_support(params["select_methods"]["indices"]) |
| 100 | 106 |
| 101 res = pandas.DataFrame(res) | 107 res = pandas.DataFrame(res, columns = selected_names) |
| 102 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) | 108 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) |
| 103 | 109 |
| 104 | 110 |
| 105 ]]> | 111 ]]> |
| 106 </configfile> | 112 </configfile> |
| 107 </configfiles> | 113 </configfiles> |
| 108 <inputs> | 114 <inputs> |
| 109 <conditional name="feature_selection_algorithms"> | 115 <expand macro="feature_selection_all" /> |
| 110 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> | |
| 111 <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> | |
| 112 <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> | |
| 113 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> | |
| 114 <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option> | |
| 115 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> | |
| 116 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> | |
| 117 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> | |
| 118 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> | |
| 119 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> | |
| 120 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> | |
| 121 <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option--> | |
| 122 <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option--> | |
| 123 <!--option value="f_regression">Univariate linear regression tests</option--> | |
| 124 <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option--> | |
| 125 <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option--> | |
| 126 </param> | |
| 127 <when value="SelectFromModel"> | |
| 128 <expand macro="feature_selection_estimator" /> | |
| 129 <conditional name="extra_estimator"> | |
| 130 <expand macro="feature_selection_extra_estimator" > | |
| 131 <option value="no_load">No, I will load a prefitted estimator</option> | |
| 132 </expand> | |
| 133 <expand macro="feature_selection_estimator_choices" > | |
| 134 <when value="no_load"> | |
| 135 <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> | |
| 136 </when> | |
| 137 </expand> | |
| 138 </conditional> | |
| 139 <section name="options" title="Other Options" expanded="True"> | |
| 140 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> | |
| 141 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> | |
| 142 </section> | |
| 143 </when> | |
| 144 <when value="GenericUnivariateSelect"> | |
| 145 <expand macro="feature_selection_score_function" /> | |
| 146 <section name="options" title="Other Options" expanded="True"> | |
| 147 <param argument="mode" type="select" label="Feature selection mode"> | |
| 148 <option value="percentile">percentile</option> | |
| 149 <option value="k_best">k_best</option> | |
| 150 <option value="fpr">fpr</option> | |
| 151 <option value="fdr">fdr</option> | |
| 152 <option value="fwe">fwe</option> | |
| 153 </param> | |
| 154 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> | |
| 155 </section> | |
| 156 </when> | |
| 157 <when value="SelectPercentile"> | |
| 158 <expand macro="feature_selection_score_function" /> | |
| 159 <section name="options" title="Other Options" expanded="True"> | |
| 160 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> | |
| 161 </section> | |
| 162 </when> | |
| 163 <when value="SelectKBest"> | |
| 164 <expand macro="feature_selection_score_function" /> | |
| 165 <section name="options" title="Other Options" expanded="True"> | |
| 166 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> | |
| 167 </section> | |
| 168 </when> | |
| 169 <when value="SelectFpr"> | |
| 170 <expand macro="feature_selection_score_function" /> | |
| 171 <section name="options" title="Other Options" expanded="True"> | |
| 172 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> | |
| 173 </section> | |
| 174 </when> | |
| 175 <when value="SelectFdr"> | |
| 176 <expand macro="feature_selection_score_function" /> | |
| 177 <section name="options" title="Other Options" expanded="True"> | |
| 178 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> | |
| 179 </section> | |
| 180 </when> | |
| 181 <when value="SelectFwe"> | |
| 182 <expand macro="feature_selection_score_function" /> | |
| 183 <section name="options" title="Other Options" expanded="True"> | |
| 184 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> | |
| 185 </section> | |
| 186 </when> | |
| 187 <when value="RFE"> | |
| 188 <expand macro="feature_selection_estimator" /> | |
| 189 <conditional name="extra_estimator"> | |
| 190 <expand macro="feature_selection_extra_estimator" /> | |
| 191 <expand macro="feature_selection_estimator_choices" /> | |
| 192 </conditional> | |
| 193 <section name="options" title="Other Options" expanded="True"> | |
| 194 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> | |
| 195 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> | |
| 196 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> | |
| 197 </section> | |
| 198 </when> | |
| 199 <when value="RFECV"> | |
| 200 <expand macro="feature_selection_estimator" /> | |
| 201 <conditional name="extra_estimator"> | |
| 202 <expand macro="feature_selection_extra_estimator" /> | |
| 203 <expand macro="feature_selection_estimator_choices" /> | |
| 204 </conditional> | |
| 205 <section name="options" title="Other Options" expanded="True"> | |
| 206 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> | |
| 207 <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" /> | |
| 208 <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/> | |
| 209 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> | |
| 210 <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/> | |
| 211 </section> | |
| 212 </when> | |
| 213 <when value="VarianceThreshold"> | |
| 214 <section name="options" title="Options" expanded="True"> | |
| 215 <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> | |
| 216 </section> | |
| 217 </when> | |
| 218 <!--when value="chi2"> | |
| 219 </when> | |
| 220 <when value="f_classif"> | |
| 221 </when> | |
| 222 <when value="f_regression"> | |
| 223 </when> | |
| 224 <when value="mutual_info_classif"> | |
| 225 </when> | |
| 226 <when value="mutual_info_regression"> | |
| 227 </when--> | |
| 228 </conditional> | |
| 229 <expand macro="feature_selection_methods" /> | 116 <expand macro="feature_selection_methods" /> |
| 230 <expand macro="sl_mixed_input"/> | 117 <expand macro="sl_mixed_input"/> |
| 231 </inputs> | 118 </inputs> |
| 232 <outputs> | 119 <outputs> |
| 233 <data format="txt" name="outfile"/> | 120 <data format="tabular" name="outfile"/> |
| 234 </outputs> | 121 </outputs> |
| 235 <tests> | 122 <tests> |
| 236 <test> | 123 <test> |
| 237 <param name="selected_algorithm" value="SelectFromModel"/> | 124 <param name="selected_algorithm" value="SelectFromModel"/> |
| 238 <param name="has_estimator" value="no"/> | 125 <param name="has_estimator" value="no"/> |
