Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 8:5f78d3786bfc draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
| author | bgruening |
|---|---|
| date | Fri, 13 Jul 2018 03:53:15 -0400 |
| parents | 283b4baedc66 |
| children | 2593ba492276 |
comparison
equal
deleted
inserted
replaced
| 7:c6e51784e8f6 | 8:5f78d3786bfc |
|---|---|
| 20 import pandas | 20 import pandas |
| 21 import ast | 21 import ast |
| 22 import pickle | 22 import pickle |
| 23 import numpy as np | 23 import numpy as np |
| 24 import sklearn.model_selection | 24 import sklearn.model_selection |
| 25 from sklearn import svm, linear_model, ensemble | 25 from sklearn import svm, linear_model, ensemble, preprocessing |
| 26 from sklearn.pipeline import Pipeline | 26 from sklearn.pipeline import Pipeline |
| 27 | 27 |
| 28 @COLUMNS_FUNCTION@ | 28 @COLUMNS_FUNCTION@ |
| 29 | 29 |
| 30 @FEATURE_SELECTOR_FUNCTION@ | 30 @FEATURE_SELECTOR_FUNCTION@ |
| 31 | 31 |
| 32 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
| 33 params = json.load(open(input_json_path, "r")) | 33 with open(input_json_path, "r") as param_handler: |
| 34 params = json.load(param_handler) | |
| 34 | 35 |
| 35 input_type = params["input_options"]["selected_input"] | 36 input_type = params["input_options"]["selected_input"] |
| 36 if input_type=="tabular": | 37 if input_type=="tabular": |
| 37 header = 'infer' if params["input_options"]["header1"] else None | 38 header = 'infer' if params["input_options"]["header1"] else None |
| 38 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 39 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] |
| 47 sep='\t', | 48 sep='\t', |
| 48 header=header, | 49 header=header, |
| 49 parse_dates=True | 50 parse_dates=True |
| 50 ) | 51 ) |
| 51 else: | 52 else: |
| 52 X = mmread(open("$input_options.infile1", 'r')) | 53 X = mmread("$input_options.infile1") |
| 53 | 54 |
| 54 header = 'infer' if params["input_options"]["header2"] else None | 55 header = 'infer' if params["input_options"]["header2"] else None |
| 55 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 56 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] |
| 56 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 57 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
| 57 c = params["input_options"]["column_selector_options_2"]["col2"] | 58 c = params["input_options"]["column_selector_options_2"]["col2"] |
| 73 if 'pre_dispatch' in options and options['pre_dispatch'] == '': | 74 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
| 74 options['pre_dispatch'] = None | 75 options['pre_dispatch'] = None |
| 75 | 76 |
| 76 pipeline_steps = [] | 77 pipeline_steps = [] |
| 77 | 78 |
| 79 ## Set up pre_processor and add to pipeline steps. | |
| 80 if params['pre_processing']['do_pre_processing'] == 'Yes': | |
| 81 preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] | |
| 82 pre_processor_options = params["pre_processing"]["pre_processors"]["options"] | |
| 83 my_class = getattr(preprocessing, preprocessor) | |
| 84 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) | |
| 85 | |
| 78 ## Set up feature selector and add to pipeline steps. | 86 ## Set up feature selector and add to pipeline steps. |
| 79 if params['feature_selection']['do_feature_selection'] == 'Yes': | 87 if params['feature_selection']['do_feature_selection'] == 'Yes': |
| 80 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) | 88 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) |
| 81 pipeline_steps.append( ('feature_selector', feature_selector)) | 89 pipeline_steps.append( ('feature_selector', feature_selector) ) |
| 82 | 90 |
| 83 ## Set up estimator and add to pipeline. | 91 ## Set up estimator and add to pipeline. |
| 84 estimator=params["model_validation_functions"]["estimator"] | 92 estimator=params["model_validation_functions"]["estimator"] |
| 85 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': | 93 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': |
| 86 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] | 94 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] |
| 136 | 144 |
| 137 ]]> | 145 ]]> |
| 138 </configfile> | 146 </configfile> |
| 139 </configfiles> | 147 </configfiles> |
| 140 <inputs> | 148 <inputs> |
| 149 <conditional name="pre_processing"> | |
| 150 <param name="do_pre_processing" type="select" label="Do pre_processing?"> | |
| 151 <option value="No" selected="true"/> | |
| 152 <option value="Yes"/> | |
| 153 </param> | |
| 154 <when value="No"/> | |
| 155 <when value="Yes"> | |
| 156 <conditional name="pre_processors"> | |
| 157 <expand macro="sparse_preprocessors_ext" /> | |
| 158 <expand macro="sparse_preprocessor_options_ext" /> | |
| 159 </conditional> | |
| 160 </when> | |
| 161 </conditional> | |
| 141 <conditional name="feature_selection"> | 162 <conditional name="feature_selection"> |
| 142 <param name="do_feature_selection" type="select" label="Do feature selection?"> | 163 <param name="do_feature_selection" type="select" label="Do feature selection?"> |
| 143 <option value="No" selected="true"/> | 164 <option value="No" selected="true"/> |
| 144 <option value="Yes"/> | 165 <option value="Yes"/> |
| 145 </param> | 166 </param> |
| 350 <param name="header1" value="true" /> | 371 <param name="header1" value="true" /> |
| 351 <param name="selected_column_selector_option" value="all_columns"/> | 372 <param name="selected_column_selector_option" value="all_columns"/> |
| 352 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 373 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
| 353 <param name="header2" value="true" /> | 374 <param name="header2" value="true" /> |
| 354 <param name="selected_column_selector_option2" value="all_columns"/> | 375 <param name="selected_column_selector_option2" value="all_columns"/> |
| 355 <output name="outfile" file="mv_result07.tabular"/> | 376 <output name="outfile" > |
| 377 <assert_contents> | |
| 378 <has_line line="0.7824428015300172" /> | |
| 379 </assert_contents> | |
| 380 </output> | |
| 381 </test> | |
| 382 <test> | |
| 383 <param name="do_pre_processing" value="Yes"/> | |
| 384 <param name="selected_pre_processor" value="RobustScaler"/> | |
| 385 <param name="do_feature_selection" value="Yes"/> | |
| 386 <param name="selected_algorithm" value="SelectKBest"/> | |
| 387 <param name="score_func" value="f_classif"/> | |
| 388 <param name="selected_function" value="GridSearchCV"/> | |
| 389 <param name="estimator" value="svm.SVR(kernel="linear")"/> | |
| 390 <param name="has_estimator" value="yes"/> | |
| 391 <param name="param_grid" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]"/> | |
| 392 <param name="return_type" value="best_score_"/> | |
| 393 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
| 394 <param name="header1" value="true" /> | |
| 395 <param name="selected_column_selector_option" value="all_columns"/> | |
| 396 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
| 397 <param name="header2" value="true" /> | |
| 398 <param name="selected_column_selector_option2" value="all_columns"/> | |
| 399 <output name="outfile" > | |
| 400 <assert_contents> | |
| 401 <has_line line="0.7938837807353147" /> | |
| 402 </assert_contents> | |
| 403 </output> | |
| 404 </test> | |
| 405 <test> | |
| 406 <param name="do_pre_processing" value="Yes"/> | |
| 407 <param name="selected_pre_processor" value="RobustScaler"/> | |
| 408 <param name="selected_function" value="GridSearchCV"/> | |
| 409 <param name="estimator" value="svm.SVR(kernel="linear")"/> | |
| 410 <param name="has_estimator" value="yes"/> | |
| 411 <param name="param_grid" value="[{'estimator__C': [1, 10, 100, 1000]}]"/> | |
| 412 <param name="return_type" value="best_score_"/> | |
| 413 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
| 414 <param name="header1" value="true" /> | |
| 415 <param name="selected_column_selector_option" value="all_columns"/> | |
| 416 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
| 417 <param name="header2" value="true" /> | |
| 418 <param name="selected_column_selector_option2" value="all_columns"/> | |
| 419 <output name="outfile" > | |
| 420 <assert_contents> | |
| 421 <has_line line="0.7904476204861263" /> | |
| 422 </assert_contents> | |
| 423 </output> | |
| 356 </test> | 424 </test> |
| 357 </tests> | 425 </tests> |
| 358 <help> | 426 <help> |
| 359 <