Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 20:547fb1cde4cc draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
| author | bgruening |
|---|---|
| date | Fri, 09 Aug 2019 06:35:08 -0400 |
| parents | 15d8ba35c23c |
| children | bfdf732091c1 |
comparison
equal
deleted
inserted
replaced
| 19:134fe32e3701 | 20:547fb1cde4cc |
|---|---|
| 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1"> | 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@"> |
| 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> | 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="python_requirements"/> | 6 <expand macro="python_requirements"/> |
| 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
| 30 svm, linear_model, tree, discriminant_analysis) | 30 svm, linear_model, tree, discriminant_analysis) |
| 31 from imblearn.pipeline import Pipeline as imbPipeline | 31 from imblearn.pipeline import Pipeline as imbPipeline |
| 32 from sklearn.pipeline import Pipeline | 32 from sklearn.pipeline import Pipeline |
| 33 | 33 |
| 34 sys.path.insert(0, '$__tool_directory__') | 34 from galaxy_ml.utils import (SafeEval, feature_selector, |
| 35 from utils import SafeEval, feature_selector, read_columns | 35 read_columns, get_module) |
| 36 | |
| 36 | 37 |
| 37 warnings.simplefilter('ignore') | 38 warnings.simplefilter('ignore') |
| 38 | 39 |
| 39 safe_eval = SafeEval() | 40 safe_eval = SafeEval() |
| 40 | 41 |
| 69 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g' | 70 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g' |
| 70 #end if | 71 #end if |
| 71 | 72 |
| 72 ## Read features | 73 ## Read features |
| 73 features_has_header = params['input_options']['header1'] | 74 features_has_header = params['input_options']['header1'] |
| 74 input_type = params['input_options']['selected_input'] | 75 #if $input_options.selected_input == 'tabular' |
| 75 if input_type == 'tabular': | 76 header = 'infer' if features_has_header else None |
| 76 header = 'infer' if features_has_header else None | 77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] |
| 77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] | 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 79 c = params['input_options']['column_selector_options_1']['col1'] |
| 79 c = params['input_options']['column_selector_options_1']['col1'] | |
| 80 else: | |
| 81 c = None | |
| 82 X, input_df = read_columns( | |
| 83 '$input_options.infile1', | |
| 84 c = c, | |
| 85 c_option = column_option, | |
| 86 return_df = True, | |
| 87 sep='\t', | |
| 88 header=header, | |
| 89 parse_dates=True) | |
| 90 X = X.astype(float) | |
| 91 else: | 80 else: |
| 92 X = mmread('$input_options.infile1') | 81 c = None |
| 82 X, input_df = read_columns( | |
| 83 '$input_options.infile1', | |
| 84 c = c, | |
| 85 c_option = column_option, | |
| 86 return_df = True, | |
| 87 sep='\t', | |
| 88 header=header, | |
| 89 parse_dates=True) | |
| 90 X = X.astype(float) | |
| 91 #elif $input_options.selected_input == 'seq_fasta' | |
| 92 fasta_file = '$input_options.fasta_file' | |
| 93 pyfaidx = get_module('pyfaidx') | |
| 94 sequences = pyfaidx.Fasta(fasta_file) | |
| 95 n_seqs = len(sequences.keys()) | |
| 96 X = np.arange(n_seqs)[:, np.newaxis] | |
| 97 for param in estimator_params.keys(): | |
| 98 if param.endswith('fasta_path'): | |
| 99 estimator.set_params( | |
| 100 **{param: fasta_file}) | |
| 101 else: | |
| 102 raise ValueError( | |
| 103 "The selected estimator doesn't support " | |
| 104 "fasta file input! Please consider using " | |
| 105 "KerasGBatchClassifier with " | |
| 106 "FastaDNABatchGenerator/FastaProteinBatchGenerator " | |
| 107 "or having GenomeOneHotEncoder/ProteinOneHotEncoder " | |
| 108 "in pipeline!") | |
| 109 #elif $input_options.selected_input == 'sparse' | |
| 110 X = mmread('$input_options.infile1') | |
| 111 #end if | |
| 93 | 112 |
| 94 ## Read labels | 113 ## Read labels |
| 95 header = 'infer' if params['input_options']['header2'] else None | 114 header = 'infer' if params['input_options']['header2'] else None |
| 96 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] | 115 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
| 97 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 116 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
