Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 17:cc5b841f040b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
| author | bgruening |
|---|---|
| date | Sun, 30 Dec 2018 01:41:30 -0500 |
| parents | 3232ad8fee41 |
| children | 15d8ba35c23c |
comparison
equal
deleted
inserted
replaced
| 16:5cfdf640dee4 | 17:cc5b841f040b |
|---|---|
| 13 </command> | 13 </command> |
| 14 <configfiles> | 14 <configfiles> |
| 15 <inputs name="inputs" /> | 15 <inputs name="inputs" /> |
| 16 <configfile name="feature_selection_script"> | 16 <configfile name="feature_selection_script"> |
| 17 <![CDATA[ | 17 <![CDATA[ |
| 18 import sys | |
| 19 import os | |
| 20 import json | 18 import json |
| 21 import pandas | |
| 22 import sklearn.feature_selection | 19 import sklearn.feature_selection |
| 23 | 20 |
| 24 with open("$__tool_directory__/sk_whitelist.json", "r") as f: | 21 with open('$__tool_directory__/sk_whitelist.json', 'r') as f: |
| 25 sk_whitelist = json.load(f) | 22 sk_whitelist = json.load(f) |
| 26 exec(open("$__tool_directory__/utils.py").read(), globals()) | 23 exec(open('$__tool_directory__/utils.py').read(), globals()) |
| 24 | |
| 25 warnings.simplefilter('ignore') | |
| 27 | 26 |
| 28 safe_eval = SafeEval() | 27 safe_eval = SafeEval() |
| 29 | 28 |
| 30 input_json_path = sys.argv[1] | 29 input_json_path = sys.argv[1] |
| 31 with open(input_json_path, "r") as param_handler: | 30 with open(input_json_path, 'r') as param_handler: |
| 32 params = json.load(param_handler) | 31 params = json.load(param_handler) |
| 33 | 32 |
| 34 #handle cheetah | 33 #handle cheetah |
| 35 #if $fs_algorithm_selector.selected_algorithm == "SelectFromModel"\ | 34 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\ |
| 36 and $fs_algorithm_selector.model_inputter.input_mode == "prefitted": | 35 and $fs_algorithm_selector.model_inputter.input_mode == 'prefitted': |
| 37 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ | 36 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ |
| 38 "$fs_algorithm_selector.model_inputter.fitted_estimator" | 37 '$fs_algorithm_selector.model_inputter.fitted_estimator' |
| 38 #end if | |
| 39 | |
| 40 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\ | |
| 41 and $fs_algorithm_selector.model_inputter.input_mode == 'new'\ | |
| 42 and $fs_algorithm_selector.model_inputter.estimator_selector.selected_module == 'customer_estimator': | |
| 43 params['fs_algorithm_selector']['model_inputter']['estimator_selector']['c_estimator'] =\ | |
| 44 '$fs_algorithm_selector.model_inputter.estimator_selector.c_estimator' | |
| 45 #end if | |
| 46 | |
| 47 #if $fs_algorithm_selector.selected_algorithm in ['RFE', 'RFECV']\ | |
| 48 and $fs_algorithm_selector.estimator_selector.selected_module == 'customer_estimator': | |
| 49 params['fs_algorithm_selector']['estimator_selector']['c_estimator'] =\ | |
| 50 '$fs_algorithm_selector.estimator_selector.c_estimator' | |
| 39 #end if | 51 #end if |
| 40 | 52 |
| 41 # Read features | 53 # Read features |
| 42 features_has_header = params["input_options"]["header1"] | 54 features_has_header = params['input_options']['header1'] |
| 43 input_type = params["input_options"]["selected_input"] | 55 input_type = params['input_options']['selected_input'] |
| 44 if input_type=="tabular": | 56 if input_type == 'tabular': |
| 45 header = 'infer' if features_has_header else None | 57 header = 'infer' if features_has_header else None |
| 46 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 58 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] |
| 47 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 59 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 48 c = params["input_options"]["column_selector_options_1"]["col1"] | 60 c = params['input_options']['column_selector_options_1']['col1'] |
| 49 else: | 61 else: |
| 50 c = None | 62 c = None |
| 51 X, input_df = read_columns( | 63 X, input_df = read_columns( |
| 52 "$input_options.infile1", | 64 '$input_options.infile1', |
| 53 c = c, | 65 c = c, |
| 54 c_option = column_option, | 66 c_option = column_option, |
| 55 return_df = True, | 67 return_df = True, |
| 56 sep='\t', | 68 sep='\t', |
| 57 header=header, | 69 header=header, |
| 58 parse_dates=True | 70 parse_dates=True |
| 59 ) | 71 ) |
| 60 else: | 72 else: |
| 61 X = mmread("$input_options.infile1") | 73 X = mmread('$input_options.infile1') |
| 62 | 74 |
| 63 # Read labels | 75 # Read labels |
| 64 header = 'infer' if params["input_options"]["header2"] else None | 76 header = 'infer' if params['input_options']['header2'] else None |
| 65 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 77 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
| 66 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 67 c = params["input_options"]["column_selector_options_2"]["col2"] | 79 c = params['input_options']['column_selector_options_2']['col2'] |
| 68 else: | 80 else: |
| 69 c = None | 81 c = None |
| 70 y = read_columns( | 82 y = read_columns( |
| 71 "$input_options.infile2", | 83 '$input_options.infile2', |
| 72 c = c, | 84 c = c, |
| 73 c_option = column_option, | 85 c_option = column_option, |
| 74 sep='\t', | 86 sep='\t', |
| 75 header=header, | 87 header=header, |
| 76 parse_dates=True | 88 parse_dates=True |
| 83 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : | 95 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : |
| 84 new_selector.fit(X, y) | 96 new_selector.fit(X, y) |
| 85 | 97 |
| 86 ## Transform to select features | 98 ## Transform to select features |
| 87 selected_names = None | 99 selected_names = None |
| 88 if "$output_method_selector.selected_method" == "fit_transform": | 100 |
| 89 res = new_selector.transform(X) | 101 res = new_selector.transform(X) |
| 90 if features_has_header: | 102 if features_has_header: |
| 91 selected_names = input_df.columns[new_selector.get_support(indices=True)] | 103 selected_names = input_df.columns[new_selector.get_support(indices=True)] |
| 92 else: | |
| 93 res = new_selector.get_support(params["output_method_selector"]["indices"]) | |
| 94 | |
| 95 res = pandas.DataFrame(res, columns = selected_names) | 104 res = pandas.DataFrame(res, columns = selected_names) |
| 96 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) | 105 res.to_csv(path_or_buf='$outfile', sep='\t', index=False) |
| 97 | 106 |
| 107 #if $save: | |
| 108 with open('$outfile_selector', 'wb') as output_handler: | |
| 109 pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL) | |
| 110 #end if | |
| 98 | 111 |
| 99 ]]> | 112 ]]> |
| 100 </configfile> | 113 </configfile> |
| 101 </configfiles> | 114 </configfiles> |
| 102 <inputs> | 115 <inputs> |
| 103 <expand macro="feature_selection_all"> | 116 <expand macro="feature_selection_fs"/> |
| 104 <expand macro="fs_selectfrommodel_prefitted"/> | 117 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?"/> |
| 105 </expand> | |
| 106 <expand macro="feature_selection_output_mothods" /> | |
| 107 <expand macro="sl_mixed_input"/> | 118 <expand macro="sl_mixed_input"/> |
| 108 </inputs> | 119 </inputs> |
| 109 <outputs> | 120 <outputs> |
| 110 <data format="tabular" name="outfile"/> | 121 <data format="tabular" name="outfile" /> |
| 122 <data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}"> | |
| 123 <filter>save</filter> | |
| 124 </data> | |
| 111 </outputs> | 125 </outputs> |
| 112 <tests> | 126 <tests> |
| 113 <test> | 127 <test> |
| 114 <param name="selected_algorithm" value="SelectFromModel"/> | 128 <param name="selected_algorithm" value="SelectFromModel"/> |
| 115 <param name="input_mode" value="new"/> | 129 <param name="input_mode" value="new"/> |
