Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 10:d00e89558c18 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
| author | bgruening |
|---|---|
| date | Sat, 04 Aug 2018 12:17:30 -0400 |
| parents | 7701da597d1d |
| children | 467550472f7d |
comparison
equal
deleted
inserted
replaced
| 9:7701da597d1d | 10:d00e89558c18 |
|---|---|
| 17 <![CDATA[ | 17 <![CDATA[ |
| 18 import sys | 18 import sys |
| 19 import json | 19 import json |
| 20 import pandas | 20 import pandas |
| 21 import pickle | 21 import pickle |
| 22 import ast | |
| 22 import numpy as np | 23 import numpy as np |
| 24 import xgboost | |
| 23 import sklearn.feature_selection | 25 import sklearn.feature_selection |
| 24 from sklearn import svm, linear_model, ensemble | 26 from sklearn import svm, linear_model, ensemble, naive_bayes, tree, neighbors |
| 25 | 27 |
| 26 @COLUMNS_FUNCTION@ | 28 @COLUMNS_FUNCTION@ |
| 27 | 29 @GET_ESTIMATOR_FUNCTION@ |
| 28 @FEATURE_SELECTOR_FUNCTION@ | 30 @FEATURE_SELECTOR_FUNCTION@ |
| 29 | 31 |
| 30 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
| 31 with open(input_json_path, "r") as param_handler: | 33 with open(input_json_path, "r") as param_handler: |
| 32 params = json.load(param_handler) | 34 params = json.load(param_handler) |
| 33 | 35 |
| 34 ## Read features | 36 #handle cheetah |
| 37 #if $fs_algorithm_selector.selected_algorithm == "SelectFromModel"\ | |
| 38 and $fs_algorithm_selector.model_inputter.input_mode == "prefitted": | |
| 39 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ | |
| 40 "$fs_algorithm_selector.model_inputter.fitted_estimator" | |
| 41 #end if | |
| 42 | |
| 43 # Read features | |
| 35 features_has_header = params["input_options"]["header1"] | 44 features_has_header = params["input_options"]["header1"] |
| 36 input_type = params["input_options"]["selected_input"] | 45 input_type = params["input_options"]["selected_input"] |
| 37 if input_type=="tabular": | 46 if input_type=="tabular": |
| 38 header = 'infer' if features_has_header else None | 47 header = 'infer' if features_has_header else None |
| 39 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 48 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] |
| 51 parse_dates=True | 60 parse_dates=True |
| 52 ) | 61 ) |
| 53 else: | 62 else: |
| 54 X = mmread("$input_options.infile1") | 63 X = mmread("$input_options.infile1") |
| 55 | 64 |
| 56 ## Read labels | 65 # Read labels |
| 57 header = 'infer' if params["input_options"]["header2"] else None | 66 header = 'infer' if params["input_options"]["header2"] else None |
| 58 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 67 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] |
| 59 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 68 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
| 60 c = params["input_options"]["column_selector_options_2"]["col2"] | 69 c = params["input_options"]["column_selector_options_2"]["col2"] |
| 61 else: | 70 else: |
| 68 header=header, | 77 header=header, |
| 69 parse_dates=True | 78 parse_dates=True |
| 70 ) | 79 ) |
| 71 y=y.ravel() | 80 y=y.ravel() |
| 72 | 81 |
| 73 ## Create feature selector | 82 # Create feature selector |
| 74 new_selector = feature_selector(params['feature_selection_algorithms']) | 83 new_selector = feature_selector(params['fs_algorithm_selector']) |
| 75 if params['feature_selection_algorithms']['selected_algorithm'] != 'SelectFromModel' or \ | 84 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ |
| 76 'extra_estimator' not in params['feature_selection_algorithms'] or \ | 85 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : |
| 77 params['feature_selection_algorithms']['extra_estimator']['has_estimator'] != 'no_load' : | |
| 78 new_selector.fit(X, y) | 86 new_selector.fit(X, y) |
| 79 | 87 |
| 80 ## Transform to select features | 88 ## Transform to select features |
| 81 selected_names = None | 89 selected_names = None |
| 82 if "$select_methods.selected_method" == "fit_transform": | 90 if "$output_method_selector.selected_method" == "fit_transform": |
| 83 res = new_selector.transform(X) | 91 res = new_selector.transform(X) |
| 84 if features_has_header: | 92 if features_has_header: |
| 85 selected_names = input_df.columns[new_selector.get_support(indices=True)] | 93 selected_names = input_df.columns[new_selector.get_support(indices=True)] |
| 86 else: | 94 else: |
| 87 res = new_selector.get_support(params["select_methods"]["indices"]) | 95 res = new_selector.get_support(params["output_method_selector"]["indices"]) |
| 88 | 96 |
| 89 res = pandas.DataFrame(res, columns = selected_names) | 97 res = pandas.DataFrame(res, columns = selected_names) |
| 90 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) | 98 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) |
| 91 | 99 |
| 92 | 100 |
| 93 ]]> | 101 ]]> |
| 94 </configfile> | 102 </configfile> |
| 95 </configfiles> | 103 </configfiles> |
| 96 <inputs> | 104 <inputs> |
| 97 <expand macro="feature_selection_all" /> | 105 <expand macro="feature_selection_all"> |
| 98 <expand macro="feature_selection_methods" /> | 106 <expand macro="fs_selectfrommodel_prefitted"/> |
| 107 </expand> | |
| 108 <expand macro="feature_selection_output_mothods" /> | |
| 99 <expand macro="sl_mixed_input"/> | 109 <expand macro="sl_mixed_input"/> |
| 100 </inputs> | 110 </inputs> |
| 101 <outputs> | 111 <outputs> |
| 102 <data format="tabular" name="outfile"/> | 112 <data format="tabular" name="outfile"/> |
| 103 </outputs> | 113 </outputs> |
| 104 <tests> | 114 <tests> |
| 105 <test> | 115 <test> |
| 106 <param name="selected_algorithm" value="SelectFromModel"/> | 116 <param name="selected_algorithm" value="SelectFromModel"/> |
| 107 <param name="has_estimator" value="no"/> | 117 <param name="input_mode" value="new"/> |
| 108 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> | 118 <param name="selected_module" value="ensemble"/> |
| 109 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 119 <param name="selected_estimator" value="RandomForestRegressor"/> |
| 110 <param name="header1" value="True"/> | 120 <param name="text_params" value="'n_estimators': 10, 'random_state': 10"/> |
| 111 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | 121 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
| 112 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 122 <param name="header1" value="false"/> |
| 113 <param name="col2" value="1"/> | 123 <param name="col1" value="1,2,3,4,5"/> |
| 114 <param name="header2" value="True"/> | 124 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
| 125 <param name="col2" value="6"/> | |
| 126 <param name="header2" value="false"/> | |
| 115 <output name="outfile" file="feature_selection_result01"/> | 127 <output name="outfile" file="feature_selection_result01"/> |
| 116 </test> | 128 </test> |
| 117 <test> | 129 <test> |
| 118 <param name="selected_algorithm" value="GenericUnivariateSelect"/> | 130 <param name="selected_algorithm" value="GenericUnivariateSelect"/> |
| 119 <param name="param" value="20"/> | 131 <param name="param" value="20"/> |
| 178 <param name="header2" value="True"/> | 190 <param name="header2" value="True"/> |
| 179 <output name="outfile" file="feature_selection_result07"/> | 191 <output name="outfile" file="feature_selection_result07"/> |
| 180 </test> | 192 </test> |
| 181 <test> | 193 <test> |
| 182 <param name="selected_algorithm" value="RFE"/> | 194 <param name="selected_algorithm" value="RFE"/> |
| 183 <param name="has_estimator" value="no"/> | 195 <param name="input_mode" value="new"/> |
| 184 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> | 196 <param name="selected_module" value="ensemble"/> |
| 185 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 197 <param name="selected_estimator" value="RandomForestRegressor"/> |
| 186 <param name="header1" value="True"/> | 198 <param name="text_params" value="'n_estimators': 10, 'random_state':10"/> |
| 187 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | 199 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
| 188 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 200 <param name="header1" value="false"/> |
| 189 <param name="col2" value="1"/> | 201 <param name="col1" value="1,2,3,4,5"/> |
| 190 <param name="header2" value="True"/> | 202 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
| 203 <param name="col2" value="6"/> | |
| 204 <param name="header2" value="false"/> | |
| 191 <output name="outfile" file="feature_selection_result08"/> | 205 <output name="outfile" file="feature_selection_result08"/> |
| 192 </test> | 206 </test> |
| 193 <test> | 207 <test> |
| 194 <param name="selected_algorithm" value="RFECV"/> | 208 <param name="selected_algorithm" value="RFECV"/> |
| 195 <param name="has_estimator" value="no"/> | 209 <param name="input_mode" value="new"/> |
| 196 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> | 210 <param name="selected_module" value="ensemble"/> |
| 197 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 211 <param name="selected_estimator" value="RandomForestRegressor"/> |
| 198 <param name="header1" value="True"/> | 212 <param name="text_params" value="'n_estimators': 10, 'random_state':10"/> |
| 199 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | 213 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
| 200 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 214 <param name="header1" value="false"/> |
| 201 <param name="col2" value="1"/> | 215 <param name="col1" value="1,2,3,4,5"/> |
| 202 <param name="header2" value="True"/> | 216 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
| 217 <param name="col2" value="6"/> | |
| 218 <param name="header2" value="false"/> | |
| 203 <output name="outfile" file="feature_selection_result09"/> | 219 <output name="outfile" file="feature_selection_result09"/> |
| 204 </test> | 220 </test> |
| 205 <test> | 221 <test> |
| 206 <param name="selected_algorithm" value="VarianceThreshold"/> | 222 <param name="selected_algorithm" value="VarianceThreshold"/> |
| 207 <param name="threshold" value="0.1"/> | 223 <param name="threshold" value="0.1"/> |
| 223 <param name="infile2" value="test3.tabular" ftype="tabular"/> | 239 <param name="infile2" value="test3.tabular" ftype="tabular"/> |
| 224 <param name="header2" value="True"/> | 240 <param name="header2" value="True"/> |
| 225 <param name="selected_column_selector_option2" value="by_header_name"/> | 241 <param name="selected_column_selector_option2" value="by_header_name"/> |
| 226 <param name="col2" value="target"/> | 242 <param name="col2" value="target"/> |
| 227 <output name="outfile" file="feature_selection_result11"/> | 243 <output name="outfile" file="feature_selection_result11"/> |
| 244 </test> | |
| 245 <test> | |
| 246 <param name="selected_algorithm" value="SelectFromModel"/> | |
| 247 <param name="input_mode" value="prefitted"/> | |
| 248 <param name="fitted_estimator" value="rfr_model01" ftype="zip"/> | |
| 249 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
| 250 <param name="header1" value="false"/> | |
| 251 <param name="col1" value="1,2,3,4,5"/> | |
| 252 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
| 253 <param name="col2" value="1"/> | |
| 254 <param name="header2" value="false"/> | |
| 255 <output name="outfile" file="feature_selection_result12"/> | |
| 228 </test> | 256 </test> |
| 229 </tests> | 257 </tests> |
| 230 <help> | 258 <help> |
| 231 <