Mercurial > repos > bgruening > sklearn_build_pipeline
comparison pipeline.xml @ 10:0c2586a48d0f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
| author | bgruening |
|---|---|
| date | Fri, 09 Aug 2019 06:27:28 -0400 |
| parents | 0be40b86763f |
| children | a7a047cf36d8 |
comparison
equal
deleted
inserted
replaced
| 9:ad2a1fd3431f | 10:0c2586a48d0f |
|---|---|
| 15 <inputs name="inputs" /> | 15 <inputs name="inputs" /> |
| 16 <configfile name="sklearn_pipeline_script"> | 16 <configfile name="sklearn_pipeline_script"> |
| 17 <![CDATA[ | 17 <![CDATA[ |
| 18 import imblearn | 18 import imblearn |
| 19 import json | 19 import json |
| 20 import pandas as pd | |
| 20 import pickle | 21 import pickle |
| 21 import pprint | 22 import pprint |
| 22 import skrebate | 23 import skrebate |
| 23 import sys | 24 import sys |
| 24 import warnings | 25 import warnings |
| 25 from mlxtend import classifier, regressor | |
| 26 from sklearn import ( | 26 from sklearn import ( |
| 27 cluster, compose, decomposition, ensemble, feature_extraction, | 27 cluster, compose, decomposition, ensemble, feature_extraction, |
| 28 feature_selection, gaussian_process, kernel_approximation, metrics, | 28 feature_selection, gaussian_process, kernel_approximation, metrics, |
| 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
| 30 svm, linear_model, tree, discriminant_analysis) | 30 svm, linear_model, tree, discriminant_analysis) |
| 31 from sklearn.pipeline import make_pipeline | 31 from sklearn.pipeline import make_pipeline |
| 32 from imblearn.pipeline import make_pipeline as imb_make_pipeline | 32 from imblearn.pipeline import make_pipeline as imb_make_pipeline |
| 33 | 33 from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator, |
| 34 sys.path.insert(0, '$__tool_directory__') | 34 try_get_attr, get_search_params) |
| 35 | 35 |
| 36 from utils import SafeEval, feature_selector, get_estimator, try_get_attr | |
| 37 from preprocessors import Z_RandomOverSampler | |
| 38 | 36 |
| 39 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) | 37 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) |
| 40 | 38 |
| 41 warnings.filterwarnings('ignore') | 39 warnings.filterwarnings('ignore') |
| 42 | 40 |
| 67 sys.exit("The pre-processing component type can't be None " | 65 sys.exit("The pre-processing component type can't be None " |
| 68 "when the number of components is greater than 1.") | 66 "when the number of components is greater than 1.") |
| 69 if input_json['component_type'] == 'pre_processor': | 67 if input_json['component_type'] == 'pre_processor': |
| 70 preprocessor = input_json['pre_processors']['selected_pre_processor'] | 68 preprocessor = input_json['pre_processors']['selected_pre_processor'] |
| 71 pre_processor_options = input_json['pre_processors']['options'] | 69 pre_processor_options = input_json['pre_processors']['options'] |
| 70 if 'feature_range' in pre_processor_options: | |
| 71 feature_range = safe_eval(pre_processor_options['feature_range'].strip()) | |
| 72 if not feature_range: | |
| 73 feature_range = (0, 1) | |
| 74 pre_processor_options['feature_range'] = feature_range | |
| 72 my_class = getattr(preprocessing, preprocessor) | 75 my_class = getattr(preprocessing, preprocessor) |
| 73 obj = my_class(**pre_processor_options) | 76 obj = my_class(**pre_processor_options) |
| 74 elif input_json['component_type'] == 'feature_selection': | 77 elif input_json['component_type'] == 'feature_selection': |
| 75 obj = feature_selector(input_json['fs_algorithm_selector']) | 78 obj = feature_selector(input_json['fs_algorithm_selector']) |
| 76 elif input_json['component_type'] == 'decomposition': | 79 elif input_json['component_type'] == 'decomposition': |
| 108 is_imblearn = True | 111 is_imblearn = True |
| 109 algorithm = input_json['imblearn_selector']['select_algorithm'] | 112 algorithm = input_json['imblearn_selector']['select_algorithm'] |
| 110 if algorithm == 'over_sampling.SMOTENC': | 113 if algorithm == 'over_sampling.SMOTENC': |
| 111 obj = over_sampling.SMOTENC(categorical_features=[]) | 114 obj = over_sampling.SMOTENC(categorical_features=[]) |
| 112 elif algorithm == 'Z_RandomOverSampler': | 115 elif algorithm == 'Z_RandomOverSampler': |
| 116 Z_RandomOverSampler = try_get_attr('galaxy_ml.preprocessors', | |
| 117 'Z_RandomOverSampler') | |
| 113 obj = Z_RandomOverSampler() | 118 obj = Z_RandomOverSampler() |
| 114 else: | 119 else: |
| 115 globals = algorithm.split('.') | 120 globals = algorithm.split('.') |
| 116 mod, klass = globals[0], globals[1] | 121 mod, klass = globals[0], globals[1] |
| 117 obj = getattr(getattr(imblearn, mod), klass)() | 122 obj = getattr(getattr(imblearn, mod), klass)() |
| 118 options = input_json['imblearn_selector']['text_params'].strip() | 123 options = input_json['imblearn_selector']['text_params'].strip() |
| 119 if options != '': | 124 if options != '': |
| 120 options = safe_eval( 'dict(' + options + ')' ) | 125 options = safe_eval( 'dict(' + options + ')' ) |
| 121 obj.set_params(**options) | 126 obj.set_params(**options) |
| 122 elif input_json['component_type'] == 'IRAPS': | 127 elif input_json['component_type'] == 'IRAPS': |
| 123 iraps_core = try_get_attr('iraps_classifier','IRAPSCore')() | 128 iraps_core = try_get_attr('galaxy_ml.iraps_classifier','IRAPSCore')() |
| 124 core_params = input_json['text_params'].strip() | 129 core_params = input_json['text_params'].strip() |
| 125 if core_params != '': | 130 if core_params != '': |
| 126 try: | 131 try: |
| 127 params = safe_eval('dict(' + core_params + ')') | 132 params = safe_eval('dict(' + core_params + ')') |
| 128 except ValueError: | 133 except ValueError: |
| 135 options['fc_thres'] = input_json['fc_thres'] | 140 options['fc_thres'] = input_json['fc_thres'] |
| 136 if input_json['occurrence'] is not None: | 141 if input_json['occurrence'] is not None: |
| 137 options['occurrence'] = input_json['occurrence'] | 142 options['occurrence'] = input_json['occurrence'] |
| 138 if input_json['discretize'] is not None: | 143 if input_json['discretize'] is not None: |
| 139 options['discretize'] = input_json['discretize'] | 144 options['discretize'] = input_json['discretize'] |
| 140 IRAPSClassifier = try_get_attr('iraps_classifier','IRAPSClassifier') | 145 IRAPSClassifier = try_get_attr('galaxy_ml.iraps_classifier','IRAPSClassifier') |
| 141 obj = IRAPSClassifier(iraps_core, **options) | 146 obj = IRAPSClassifier(iraps_core, **options) |
| 147 elif input_json['component_type'] == 'preprocessors': | |
| 148 encoder_selection = input_json['encoder_selection'] | |
| 149 encoder_type = encoder_selection.pop('encoder_type') | |
| 150 klass = try_get_attr('galaxy_ml.preprocessors', encoder_type) | |
| 151 obj = klass(**encoder_selection) | |
| 152 | |
| 142 if 'n_jobs' in obj.get_params(): | 153 if 'n_jobs' in obj.get_params(): |
| 143 obj.set_params( n_jobs=N_JOBS ) | 154 obj.set_params( n_jobs=N_JOBS ) |
| 144 return obj, is_imblearn | 155 return obj, is_imblearn |
| 145 | 156 |
| 146 has_imblearn = False | 157 has_imblearn = False |
| 170 #if $output_type == 'Final_Estimator_Builder': | 181 #if $output_type == 'Final_Estimator_Builder': |
| 171 with open('$outfile', 'wb') as out_handler: | 182 with open('$outfile', 'wb') as out_handler: |
| 172 final_est = pipeline_steps[-1] | 183 final_est = pipeline_steps[-1] |
| 173 print(final_est) | 184 print(final_est) |
| 174 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL) | 185 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL) |
| 186 out_obj = final_est | |
| 175 #else: | 187 #else: |
| 176 if has_imblearn: | 188 if has_imblearn: |
| 177 pipeline = imb_make_pipeline(*pipeline_steps) | 189 pipeline = imb_make_pipeline(*pipeline_steps) |
| 178 else: | 190 else: |
| 179 pipeline = make_pipeline(*pipeline_steps) | 191 pipeline = make_pipeline(*pipeline_steps) |
| 180 pprint.pprint(pipeline.named_steps) | 192 pprint.pprint(pipeline.named_steps) |
| 181 | 193 |
| 182 with open('$outfile', 'wb') as out_handler: | 194 with open('$outfile', 'wb') as out_handler: |
| 183 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) | 195 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) |
| 196 out_obj = pipeline | |
| 197 #end if | |
| 198 | |
| 199 #if $get_params | |
| 200 results = get_search_params(out_obj) | |
| 201 df = pd.DataFrame(results, columns=['', 'Parameter', 'Value']) | |
| 202 df.to_csv('$outfile_params', sep='\t', index=False) | |
| 184 #end if | 203 #end if |
| 185 ]]> | 204 ]]> |
| 186 </configfile> | 205 </configfile> |
| 187 </configfiles> | 206 </configfiles> |
| 188 <inputs> | 207 <inputs> |
| 193 <option value="pre_processor">Sklearn Preprocessor</option> | 212 <option value="pre_processor">Sklearn Preprocessor</option> |
| 194 <option value="feature_selection">Feature Selection</option> | 213 <option value="feature_selection">Feature Selection</option> |
| 195 <option value="decomposition">Matrix Decomposition</option> | 214 <option value="decomposition">Matrix Decomposition</option> |
| 196 <option value="kernel_approximation">Kernel Approximation</option> | 215 <option value="kernel_approximation">Kernel Approximation</option> |
| 197 <option value="FeatureAgglomeration">Agglomerate Features</option> | 216 <option value="FeatureAgglomeration">Agglomerate Features</option> |
| 198 <option value="skrebate">SK-rebate feature selection</option> | 217 <option value="skrebate">SK-rebate Feature Selection</option> |
| 199 <option value="imblearn">imbalanced-learn sampling</option> | 218 <option value="imblearn">Imbalanced-learn Sampling</option> |
| 200 <option value="IRAPS">IRAPS -- feature selector and classifier</option> | 219 <option value="IRAPS">IRAPS -- feature selector and classifier</option> |
| 220 <option value="preprocessors">Bio-sequence Encoders</option> | |
| 201 </param> | 221 </param> |
| 202 <when value="None"/> | 222 <when value="None"/> |
| 203 <when value="pre_processor"> | 223 <when value="pre_processor"> |
| 204 <conditional name="pre_processors"> | 224 <conditional name="pre_processors"> |
| 205 <expand macro="sparse_preprocessors_ext" /> | 225 <expand macro="sparse_preprocessors_ext" /> |
| 230 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/> | 250 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/> |
| 231 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/> | 251 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/> |
| 232 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/> | 252 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/> |
| 233 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/> | 253 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/> |
| 234 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/> | 254 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/> |
| 255 </when> | |
| 256 <when value="preprocessors"> | |
| 257 <expand macro="preprocessors_sequence_encoders"/> | |
| 235 </when> | 258 </when> |
| 236 </conditional> | 259 </conditional> |
| 237 </repeat> | 260 </repeat> |
| 238 <section name="final_estimator" title="Final Estimator" expanded="true"> | 261 <section name="final_estimator" title="Final Estimator" expanded="true"> |
| 239 <conditional name="estimator_selector"> | 262 <conditional name="estimator_selector"> |
| 264 </section> | 287 </section> |
| 265 <param name="output_type" type="select" label="Output the final estimator instead?"> | 288 <param name="output_type" type="select" label="Output the final estimator instead?"> |
| 266 <option value="Pipeline_Builder" selected="true">Pipeline</option> | 289 <option value="Pipeline_Builder" selected="true">Pipeline</option> |
| 267 <option value="Final_Estimator_Builder">Final Estimator</option> | 290 <option value="Final_Estimator_Builder">Final Estimator</option> |
| 268 </param> | 291 </param> |
| 292 <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?" | |
| 293 help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/> | |
| 269 </inputs> | 294 </inputs> |
| 270 <outputs> | 295 <outputs> |
| 271 <data format="zip" name="outfile" label="${output_type}"/> | 296 <data format="zip" name="outfile" label="${output_type}"/> |
| 297 <data format="tabular" name="outfile_params" label="get_params for ${output_type}"> | |
| 298 <filter>get_params</filter> | |
| 299 </data> | |
| 272 </outputs> | 300 </outputs> |
| 273 <tests> | 301 <tests> |
| 274 <test> | 302 <test> |
| 275 <repeat name="pipeline_component"> | 303 <repeat name="pipeline_component"> |
| 276 <conditional name="component_selector"> | 304 <conditional name="component_selector"> |
| 470 </conditional> | 498 </conditional> |
| 471 </section> | 499 </section> |
| 472 <param name="output_type" value="Final_Estimator_Builder"/> | 500 <param name="output_type" value="Final_Estimator_Builder"/> |
| 473 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/> | 501 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/> |
| 474 </test> | 502 </test> |
| 503 <test> | |
| 504 <conditional name="component_selector"> | |
| 505 <param name="component_type" value="preprocessors"/> | |
| 506 <conditional name="encoder_selection"> | |
| 507 <param name="encoder_type" value="GenomeOneHotEncoder"/> | |
| 508 <param name="seq_length" value="1000"/> | |
| 509 <param name="padding" value="True"/> | |
| 510 </conditional> | |
| 511 </conditional> | |
| 512 <section name="final_estimator"> | |
| 513 <conditional name="estimator_selector"> | |
| 514 <param name="selected_module" value="custom_estimator"/> | |
| 515 <param name="c_estimator" value="keras_model02" ftype="zip"/> | |
| 516 </conditional> | |
| 517 </section> | |
| 518 <output name="outfile" file="pipeline16" compare="sim_size" delta="5"/> | |
| 519 </test> | |
| 475 </tests> | 520 </tests> |
| 476 <help> | 521 <help> |
| 477 <