Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 34:b9d86fc6359d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
| author | bgruening |
|---|---|
| date | Wed, 09 Aug 2023 11:58:39 +0000 |
| parents | 2ac77e0aec82 |
| children |
comparison
equal
deleted
inserted
replaced
| 33:b0b584350b7e | 34:b9d86fc6359d |
|---|---|
| 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="@PROFILE@"> |
| 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> | 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
| 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
| 30 svm, linear_model, tree, discriminant_analysis) | 30 svm, linear_model, tree, discriminant_analysis) |
| 31 from imblearn.pipeline import Pipeline as imbPipeline | 31 from imblearn.pipeline import Pipeline as imbPipeline |
| 32 from sklearn.pipeline import Pipeline | 32 from sklearn.pipeline import Pipeline |
| 33 | 33 |
| 34 from galaxy_ml.model_persist import dump_model_to_h5 | |
| 34 from galaxy_ml.utils import (SafeEval, feature_selector, | 35 from galaxy_ml.utils import (SafeEval, feature_selector, |
| 35 read_columns, get_module) | 36 read_columns, get_module) |
| 36 | 37 |
| 37 | 38 |
| 38 warnings.simplefilter('ignore') | 39 warnings.simplefilter('ignore') |
| 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 79 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 79 c = params['input_options']['column_selector_options_1']['col1'] | 80 c = params['input_options']['column_selector_options_1']['col1'] |
| 80 else: | 81 else: |
| 81 c = None | 82 c = None |
| 82 X, input_df = read_columns( | 83 X, input_df = read_columns( |
| 83 '$input_options.infile1', | 84 '$input_options.infile1', |
| 84 c = c, | 85 c = c, |
| 85 c_option = column_option, | 86 c_option = column_option, |
| 86 return_df = True, | 87 return_df = True, |
| 87 sep='\t', | 88 sep='\t', |
| 88 header=header, | 89 header=header, |
| 89 parse_dates=True) | 90 parse_dates=True, |
| 91 ) | |
| 90 X = X.astype(float) | 92 X = X.astype(float) |
| 91 #elif $input_options.selected_input == 'seq_fasta' | 93 #elif $input_options.selected_input == 'seq_fasta' |
| 92 fasta_file = '$input_options.fasta_file' | 94 fasta_file = '$input_options.fasta_file' |
| 93 pyfaidx = get_module('pyfaidx') | 95 pyfaidx = get_module('pyfaidx') |
| 94 sequences = pyfaidx.Fasta(fasta_file) | 96 sequences = pyfaidx.Fasta(fasta_file) |
| 116 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 118 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 117 c = params['input_options']['column_selector_options_2']['col2'] | 119 c = params['input_options']['column_selector_options_2']['col2'] |
| 118 else: | 120 else: |
| 119 c = None | 121 c = None |
| 120 y = read_columns( | 122 y = read_columns( |
| 121 '$input_options.infile2', | 123 '$input_options.infile2', |
| 122 c = c, | 124 c = c, |
| 123 c_option = column_option, | 125 c_option = column_option, |
| 124 sep='\t', | 126 sep='\t', |
| 125 header=header, | 127 header=header, |
| 126 parse_dates=True) | 128 parse_dates=True, |
| 129 ) | |
| 127 y = y.ravel() | 130 y = y.ravel() |
| 128 | 131 |
| 129 ## Create feature selector | 132 ## Create feature selector |
| 130 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y) | 133 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y) |
| 131 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ | 134 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ |
| 140 selected_names = input_df.columns[new_selector.get_support(indices=True)] | 143 selected_names = input_df.columns[new_selector.get_support(indices=True)] |
| 141 res = pandas.DataFrame(res, columns = selected_names) | 144 res = pandas.DataFrame(res, columns = selected_names) |
| 142 res.to_csv(path_or_buf='$outfile', sep='\t', index=False) | 145 res.to_csv(path_or_buf='$outfile', sep='\t', index=False) |
| 143 | 146 |
| 144 #if $save: | 147 #if $save: |
| 145 with open('$outfile_selector', 'wb') as output_handler: | 148 dump_model_to_h5(new_selector, '$outfile_selector') |
| 146 pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL) | |
| 147 #end if | 149 #end if |
| 148 | 150 |
| 149 ]]> | 151 ]]> |
| 150 </configfile> | 152 </configfile> |
| 151 </configfiles> | 153 </configfiles> |
| 154 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" /> | 156 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" /> |
| 155 <expand macro="sl_mixed_input_plus_sequence" /> | 157 <expand macro="sl_mixed_input_plus_sequence" /> |
| 156 </inputs> | 158 </inputs> |
| 157 <outputs> | 159 <outputs> |
| 158 <data format="tabular" name="outfile" /> | 160 <data format="tabular" name="outfile" /> |
| 159 <data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}"> | 161 <data format="h5mlm" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}"> |
| 160 <filter>save</filter> | 162 <filter>save</filter> |
| 161 </data> | 163 </data> |
| 162 </outputs> | 164 </outputs> |
| 163 <tests> | 165 <tests> |
| 164 <test> | 166 <test> |
| 292 <output name="outfile" file="feature_selection_result11" /> | 294 <output name="outfile" file="feature_selection_result11" /> |
| 293 </test> | 295 </test> |
| 294 <test> | 296 <test> |
| 295 <param name="selected_algorithm" value="SelectFromModel" /> | 297 <param name="selected_algorithm" value="SelectFromModel" /> |
| 296 <param name="input_mode" value="prefitted" /> | 298 <param name="input_mode" value="prefitted" /> |
| 297 <param name="fitted_estimator" value="rfr_model01" ftype="zip" /> | 299 <param name="fitted_estimator" value="searchCV03" ftype="h5mlm" /> |
| 298 <param name="infile1" value="regression_train.tabular" ftype="tabular" /> | 300 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
| 299 <param name="header1" value="false" /> | 301 <param name="header1" value="true" /> |
| 300 <param name="col1" value="1,2,3,4,5" /> | 302 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" /> |
| 301 <param name="infile2" value="regression_train.tabular" ftype="tabular" /> | 303 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
| 302 <param name="col2" value="1" /> | 304 <param name="col2" value="1" /> |
| 303 <param name="header2" value="false" /> | 305 <param name="header2" value="true" /> |
| 304 <output name="outfile" file="feature_selection_result12" /> | 306 <output name="outfile" file="feature_selection_result12" /> |
| 305 </test> | 307 </test> |
| 306 <test> | 308 <test> |
| 307 <param name="selected_algorithm" value="RFECV" /> | 309 <param name="selected_algorithm" value="RFECV" /> |
| 308 <param name="input_mode" value="new" /> | 310 <param name="input_mode" value="new" /> |
