Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 16:1c8109082a18 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
| author | bgruening |
|---|---|
| date | Sun, 30 Dec 2018 01:47:02 -0500 |
| parents | cc96ac126c30 |
| children | 971a2f6a790b |
comparison
equal
deleted
inserted
replaced
| 15:041cda65632b | 16:1c8109082a18 |
|---|---|
| 21 import pandas | 21 import pandas |
| 22 import numpy as np | 22 import numpy as np |
| 23 from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors | 23 from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors |
| 24 from sklearn.pipeline import Pipeline | 24 from sklearn.pipeline import Pipeline |
| 25 | 25 |
| 26 exec(open("$__tool_directory__/utils.py").read(), globals()) | 26 exec(open('$__tool_directory__/utils.py').read(), globals()) |
| 27 | |
| 28 warnings.filterwarnings('ignore') | |
| 27 | 29 |
| 28 safe_eval = SafeEval() | 30 safe_eval = SafeEval() |
| 29 | 31 |
| 30 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
| 31 with open(input_json_path, "r") as param_handler: | 33 with open(input_json_path, 'r') as param_handler: |
| 32 params = json.load(param_handler) | 34 params = json.load(param_handler) |
| 33 | 35 |
| 34 input_type = params["input_options"]["selected_input"] | 36 input_type = params['input_options']['selected_input'] |
| 35 if input_type=="tabular": | 37 if input_type == 'tabular': |
| 36 header = 'infer' if params["input_options"]["header1"] else None | 38 header = 'infer' if params['input_options']['header1'] else None |
| 37 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 39 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] |
| 38 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 40 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 39 c = params["input_options"]["column_selector_options_1"]["col1"] | 41 c = params['input_options']['column_selector_options_1']['col1'] |
| 40 else: | 42 else: |
| 41 c = None | 43 c = None |
| 42 X = read_columns( | 44 X = read_columns( |
| 43 "$input_options.infile1", | 45 '$input_options.infile1', |
| 44 c = c, | 46 c = c, |
| 45 c_option = column_option, | 47 c_option = column_option, |
| 46 sep='\t', | 48 sep='\t', |
| 47 header=header, | 49 header=header, |
| 48 parse_dates=True | 50 parse_dates=True |
| 49 ) | 51 ) |
| 50 else: | 52 else: |
| 51 X = mmread("$input_options.infile1") | 53 X = mmread('$input_options.infile1') |
| 52 | 54 |
| 53 header = 'infer' if params["input_options"]["header2"] else None | 55 header = 'infer' if params['input_options']['header2'] else None |
| 54 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 56 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
| 55 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 57 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
| 56 c = params["input_options"]["column_selector_options_2"]["col2"] | 58 c = params['input_options']['column_selector_options_2']['col2'] |
| 57 else: | 59 else: |
| 58 c = None | 60 c = None |
| 59 y = read_columns( | 61 y = read_columns( |
| 60 "$input_options.infile2", | 62 '$input_options.infile2', |
| 61 c = c, | 63 c = c, |
| 62 c_option = column_option, | 64 c_option = column_option, |
| 63 sep='\t', | 65 sep='\t', |
| 64 header=header, | 66 header=header, |
| 65 parse_dates=True | 67 parse_dates=True |
| 66 ) | 68 ) |
| 67 y=y.ravel() | 69 y=y.ravel() |
| 68 | 70 |
| 69 options = params["model_validation_functions"]["options"] | 71 options = params['model_validation_functions']['options'] |
| 70 options['cv'] = get_cv( options['cv'] ) | 72 splitter, groups = get_cv( options.pop('cv_selector') ) |
| 73 if groups is None: | |
| 74 options['cv'] = splitter | |
| 75 elif groups == '': | |
| 76 options['cv'] = list( splitter.split(X, y, groups=None) ) | |
| 77 else: | |
| 78 options['cv'] = list( splitter.split(X, y, groups=groups) ) | |
| 71 options['n_jobs'] = N_JOBS | 79 options['n_jobs'] = N_JOBS |
| 72 if 'scoring' in options: | 80 if 'scoring' in options: |
| 73 options['scoring'] = get_scoring(options['scoring']) | 81 options['scoring'] = get_scoring(options['scoring']) |
| 74 if 'pre_dispatch' in options and options['pre_dispatch'] == '': | 82 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
| 75 options['pre_dispatch'] = None | 83 options['pre_dispatch'] = None |
| 76 | 84 |
| 77 pipeline_steps = [] | 85 pipeline_steps = [] |
| 78 | 86 |
| 79 ## Set up pre_processor and add to pipeline steps. | 87 ## Set up pre_processor and add to pipeline steps. |
| 80 if params['pre_processing']['do_pre_processing'] == 'Yes': | 88 if params['pre_processing']['do_pre_processing'] == 'Yes': |
| 81 preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] | 89 preprocessor = params['pre_processing']['pre_processors']['selected_pre_processor'] |
| 82 pre_processor_options = params["pre_processing"]["pre_processors"]["options"] | 90 pre_processor_options = params['pre_processing']['pre_processors']['options'] |
| 83 my_class = getattr(preprocessing, preprocessor) | 91 my_class = getattr(preprocessing, preprocessor) |
| 84 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) | 92 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) |
| 85 | 93 |
| 86 ## Set up feature selector and add to pipeline steps. | 94 ## Set up feature selector and add to pipeline steps. |
| 87 if params['feature_selection']['do_feature_selection'] == 'Yes': | 95 if params['feature_selection']['do_feature_selection'] == 'Yes': |
| 88 feature_selector = feature_selector(params['feature_selection']['fs_algorithm_selector']) | 96 feature_selector = feature_selector(params['feature_selection']['fs_algorithm_selector']) |
| 89 pipeline_steps.append( ('feature_selector', feature_selector) ) | 97 pipeline_steps.append( ('feature_selector', feature_selector) ) |
| 90 | 98 |
| 91 ## Set up estimator and add to pipeline. | 99 ## Set up estimator and add to pipeline. |
| 92 estimator_json = params["model_validation_functions"]['estimator_selector'] | 100 estimator_json = params['model_validation_functions']['estimator_selector'] |
| 93 estimator = get_estimator(estimator_json) | 101 estimator = get_estimator(estimator_json) |
| 94 | 102 |
| 95 pipeline_steps.append( ('estimator', estimator) ) | 103 pipeline_steps.append( ('estimator', estimator) ) |
| 96 | 104 |
| 97 pipeline = Pipeline(pipeline_steps) | 105 pipeline = Pipeline(pipeline_steps) |
| 98 | 106 |
| 99 ## Set up validator, run pipeline through validator and return results. | 107 ## Set up validator, run pipeline through validator and return results. |
| 100 | 108 |
| 101 validator = params["model_validation_functions"]["selected_function"] | 109 validator = params['model_validation_functions']['selected_function'] |
| 102 validator = getattr(model_selection, validator) | 110 validator = getattr(model_selection, validator) |
| 103 | 111 |
| 104 selected_function = params["model_validation_functions"]["selected_function"] | 112 selected_function = params['model_validation_functions']['selected_function'] |
| 105 rval_type = params["model_validation_functions"].get("return_type", None) | 113 rval_type = params['model_validation_functions'].get('return_type', None) |
| 106 | 114 |
| 107 if selected_function == 'cross_validate': | 115 if selected_function == 'cross_validate': |
| 108 res = validator(pipeline, X, y, **options) | 116 res = validator(pipeline, X, y, **options) |
| 109 rval = res[rval_type] | 117 rval = res[rval_type] |
| 110 elif selected_function == 'learning_curve': | 118 elif selected_function == 'learning_curve': |
| 112 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) | 120 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) |
| 113 rval = eval(rval_type) | 121 rval = eval(rval_type) |
| 114 elif selected_function == 'permutation_test_score': | 122 elif selected_function == 'permutation_test_score': |
| 115 score, permutation_scores, pvalue = validator(pipeline, X, y, **options) | 123 score, permutation_scores, pvalue = validator(pipeline, X, y, **options) |
| 116 rval = eval(rval_type) | 124 rval = eval(rval_type) |
| 117 if rval_type in ["score", "pvalue"]: | 125 if rval_type in ['score', 'pvalue']: |
| 118 rval = [rval] | 126 rval = [rval] |
| 119 elif selected_function == 'validation_curve': | 127 elif selected_function == 'validation_curve': |
| 120 options['param_name'] = 'estimator__' + options['param_name'] | 128 options['param_name'] = 'estimator__' + options['param_name'] |
| 121 options['param_range'] = eval(options['param_range']) | 129 options['param_range'] = eval(options['param_range']) |
| 122 train_scores, test_scores = validator(pipeline, X, y, **options) | 130 train_scores, test_scores = validator(pipeline, X, y, **options) |
| 123 rval = eval(rval_type) | 131 rval = eval(rval_type) |
| 124 else: | 132 else: |
| 125 rval = validator(pipeline, X, y, **options) | 133 rval = validator(pipeline, X, y, **options) |
| 126 | 134 |
| 127 rval = pandas.DataFrame(rval) | 135 rval = pandas.DataFrame(rval) |
| 128 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) | 136 rval.to_csv(path_or_buf='$outfile', sep='\t', header=False, index=False) |
| 129 | 137 |
| 130 ]]> | 138 ]]> |
| 131 </configfile> | 139 </configfile> |
| 132 </configfiles> | 140 </configfiles> |
| 133 <inputs> | 141 <inputs> |
| 149 <option value="No" selected="true"/> | 157 <option value="No" selected="true"/> |
| 150 <option value="Yes"/> | 158 <option value="Yes"/> |
| 151 </param> | 159 </param> |
| 152 <when value="No"/> | 160 <when value="No"/> |
| 153 <when value="Yes"> | 161 <when value="Yes"> |
| 154 <expand macro="feature_selection_all"> | 162 <expand macro="feature_selection_pipeline"/> |
| 155 <expand macro="fs_selectfrommodel_no_prefitted"/> | |
| 156 </expand> | |
| 157 </when> | 163 </when> |
| 158 </conditional> | 164 </conditional> |
| 159 <conditional name="model_validation_functions"> | 165 <conditional name="model_validation_functions"> |
| 160 <param name="selected_function" type="select" label="Select a model validation function"> | 166 <param name="selected_function" type="select" label="Select a model validation function"> |
| 161 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> | 167 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> |
