sklearn_build_pipeline: pipeline.xml comparison

comparison pipeline.xml @ 10:0c2586a48d0f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty

author	bgruening
date	Fri, 09 Aug 2019 06:27:28 -0400
parents	0be40b86763f
children	a7a047cf36d8

comparison

equal deleted inserted replaced

-:ad2a1fd3431f
+:0c2586a48d0f
 <inputs name="inputs" />
 <configfile name="sklearn_pipeline_script">
 <![CDATA[
 import imblearn
 import json
+import pandas as pd
 import pickle
 import pprint
 import skrebate
 import sys
 import warnings
-from mlxtend import classifier, regressor
 from sklearn import (
 cluster, compose, decomposition, ensemble, feature_extraction,
 feature_selection, gaussian_process, kernel_approximation, metrics,
 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
 svm, linear_model, tree, discriminant_analysis)
 from sklearn.pipeline import make_pipeline
 from imblearn.pipeline import make_pipeline as imb_make_pipeline
+from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator,
-sys.path.insert(0, '$__tool_directory__')
+try_get_attr, get_search_params)
-from utils import SafeEval, feature_selector, get_estimator, try_get_attr
-from preprocessors import Z_RandomOverSampler
 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
 warnings.filterwarnings('ignore')
 sys.exit("The pre-processing component type can't be None "
 "when the number of components is greater than 1.")
 if input_json['component_type'] == 'pre_processor':
 preprocessor = input_json['pre_processors']['selected_pre_processor']
 pre_processor_options = input_json['pre_processors']['options']
+if 'feature_range' in pre_processor_options:
+feature_range = safe_eval(pre_processor_options['feature_range'].strip())
+if not feature_range:
+feature_range = (0, 1)
+pre_processor_options['feature_range'] = feature_range
 my_class = getattr(preprocessing, preprocessor)
 obj = my_class(**pre_processor_options)
 elif input_json['component_type'] == 'feature_selection':
 obj = feature_selector(input_json['fs_algorithm_selector'])
 elif input_json['component_type'] == 'decomposition':
 is_imblearn = True
 algorithm = input_json['imblearn_selector']['select_algorithm']
 if algorithm == 'over_sampling.SMOTENC':
 obj = over_sampling.SMOTENC(categorical_features=[])
 elif algorithm == 'Z_RandomOverSampler':
+Z_RandomOverSampler = try_get_attr('galaxy_ml.preprocessors',
+'Z_RandomOverSampler')
 obj = Z_RandomOverSampler()
 else:
 globals = algorithm.split('.')
 mod, klass = globals[0], globals[1]
 obj = getattr(getattr(imblearn, mod), klass)()
 options = input_json['imblearn_selector']['text_params'].strip()
 if options != '':
 options = safe_eval( 'dict(' + options + ')' )
 obj.set_params(**options)
 elif input_json['component_type'] == 'IRAPS':
-iraps_core = try_get_attr('iraps_classifier','IRAPSCore')()
+iraps_core = try_get_attr('galaxy_ml.iraps_classifier','IRAPSCore')()
 core_params = input_json['text_params'].strip()
 if core_params != '':
 try:
 params = safe_eval('dict(' + core_params + ')')
 except ValueError:
 options['fc_thres'] = input_json['fc_thres']
 if input_json['occurrence'] is not None:
 options['occurrence'] = input_json['occurrence']
 if input_json['discretize'] is not None:
 options['discretize'] = input_json['discretize']
-IRAPSClassifier = try_get_attr('iraps_classifier','IRAPSClassifier')
+IRAPSClassifier = try_get_attr('galaxy_ml.iraps_classifier','IRAPSClassifier')
 obj = IRAPSClassifier(iraps_core, **options)
+elif input_json['component_type'] == 'preprocessors':
+encoder_selection = input_json['encoder_selection']
+encoder_type = encoder_selection.pop('encoder_type')
+klass = try_get_attr('galaxy_ml.preprocessors', encoder_type)
+obj = klass(**encoder_selection)
 if 'n_jobs' in obj.get_params():
 obj.set_params( n_jobs=N_JOBS )
 return obj, is_imblearn
 has_imblearn = False
 #if $output_type == 'Final_Estimator_Builder':
 with open('$outfile', 'wb') as out_handler:
 final_est = pipeline_steps[-1]
 print(final_est)
 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL)
+out_obj = final_est
 #else:
 if has_imblearn:
 pipeline = imb_make_pipeline(*pipeline_steps)
 else:
 pipeline = make_pipeline(*pipeline_steps)
 pprint.pprint(pipeline.named_steps)
 with open('$outfile', 'wb') as out_handler:
 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL)
+out_obj = pipeline
+#end if
+#if $get_params
+results = get_search_params(out_obj)
+df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
+df.to_csv('$outfile_params', sep='\t', index=False)
 #end if
 ]]>
 </configfile>
 </configfiles>
 <inputs>
 <option value="pre_processor">Sklearn Preprocessor</option>
 <option value="feature_selection">Feature Selection</option>
 <option value="decomposition">Matrix Decomposition</option>
 <option value="kernel_approximation">Kernel Approximation</option>
 <option value="FeatureAgglomeration">Agglomerate Features</option>
-<option value="skrebate">SK-rebate feature selection</option>
+<option value="skrebate">SK-rebate Feature Selection</option>
-<option value="imblearn">imbalanced-learn sampling</option>
+<option value="imblearn">Imbalanced-learn Sampling</option>
 <option value="IRAPS">IRAPS -- feature selector and classifier</option>
+<option value="preprocessors">Bio-sequence Encoders</option>
 </param>
 <when value="None"/>
 <when value="pre_processor">
 <conditional name="pre_processors">
 <expand macro="sparse_preprocessors_ext" />
 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/>
 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/>
 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/>
 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/>
 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/>
+</when>
+<when value="preprocessors">
+<expand macro="preprocessors_sequence_encoders"/>
 </when>
 </conditional>
 </repeat>
 <section name="final_estimator" title="Final Estimator" expanded="true">
 <conditional name="estimator_selector">
 </section>
 <param name="output_type" type="select" label="Output the final estimator instead?">
 <option value="Pipeline_Builder" selected="true">Pipeline</option>
 <option value="Final_Estimator_Builder">Final Estimator</option>
 </param>
+<param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?"
+help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
 </inputs>
 <outputs>
 <data format="zip" name="outfile" label="${output_type}"/>
+<data format="tabular" name="outfile_params" label="get_params for ${output_type}">
+<filter>get_params</filter>
+</data>
 </outputs>
 <tests>
 <test>
 <repeat name="pipeline_component">
 <conditional name="component_selector">
 </conditional>
 </section>
 <param name="output_type" value="Final_Estimator_Builder"/>
 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/>
 </test>
+<test>
+<conditional name="component_selector">
+<param name="component_type" value="preprocessors"/>
+<conditional name="encoder_selection">
+<param name="encoder_type" value="GenomeOneHotEncoder"/>
+<param name="seq_length" value="1000"/>
+<param name="padding" value="True"/>
+</conditional>
+</conditional>
+<section name="final_estimator">
+<conditional name="estimator_selector">
+<param name="selected_module" value="custom_estimator"/>
+<param name="c_estimator" value="keras_model02" ftype="zip"/>
+</conditional>
+</section>
+<output name="outfile" file="pipeline16" compare="sim_size" delta="5"/>
+</test>
 </tests>
 <help>
 <![CDATA[
 **What it does**
 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps

Mercurial > repos > bgruening > sklearn_build_pipeline

comparison pipeline.xml @ 10:0c2586a48d0f draft