sklearn_feature_selection: feature_selection.xml comparison

comparison feature_selection.xml @ 34:b9d86fc6359d draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb

author	bgruening
date	Wed, 09 Aug 2023 11:58:39 +0000
parents	2ac77e0aec82
children

comparison

equal deleted inserted replaced

-:b0b584350b7e
+:b9d86fc6359d
-<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="20.05">
+<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="@PROFILE@">
 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
 <macros>
 <import>main_macros.xml</import>
 </macros>
 <expand macro="python_requirements" />
 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
 svm, linear_model, tree, discriminant_analysis)
 from imblearn.pipeline import Pipeline as imbPipeline
 from sklearn.pipeline import Pipeline
+from galaxy_ml.model_persist import dump_model_to_h5
 from galaxy_ml.utils import (SafeEval, feature_selector,
 read_columns, get_module)
 warnings.simplefilter('ignore')
 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
 c = params['input_options']['column_selector_options_1']['col1']
 else:
 c = None
 X, input_df = read_columns(
 '$input_options.infile1',
 c = c,
 c_option = column_option,
 return_df = True,
 sep='\t',
 header=header,
-parse_dates=True)
+parse_dates=True,
+)
 X = X.astype(float)
 #elif $input_options.selected_input == 'seq_fasta'
 fasta_file = '$input_options.fasta_file'
 pyfaidx = get_module('pyfaidx')
 sequences = pyfaidx.Fasta(fasta_file)
 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
 c = params['input_options']['column_selector_options_2']['col2']
 else:
 c = None
 y = read_columns(
 '$input_options.infile2',
 c = c,
 c_option = column_option,
 sep='\t',
 header=header,
-parse_dates=True)
+parse_dates=True,
+)
 y = y.ravel()
 ## Create feature selector
 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y)
 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\
 selected_names = input_df.columns[new_selector.get_support(indices=True)]
 res = pandas.DataFrame(res, columns = selected_names)
 res.to_csv(path_or_buf='$outfile', sep='\t', index=False)
 #if $save:
-with open('$outfile_selector', 'wb') as output_handler:
+dump_model_to_h5(new_selector, '$outfile_selector')
-pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL)
 #end if
 ]]>
 </configfile>
 </configfiles>
 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" />
 <expand macro="sl_mixed_input_plus_sequence" />
 </inputs>
 <outputs>
 <data format="tabular" name="outfile" />
-<data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}">
+<data format="h5mlm" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}">
 <filter>save</filter>
 </data>
 </outputs>
 <tests>
 <test>
 <output name="outfile" file="feature_selection_result11" />
 </test>
 <test>
 <param name="selected_algorithm" value="SelectFromModel" />
 <param name="input_mode" value="prefitted" />
-<param name="fitted_estimator" value="rfr_model01" ftype="zip" />
+<param name="fitted_estimator" value="searchCV03" ftype="h5mlm" />
-<param name="infile1" value="regression_train.tabular" ftype="tabular" />
+<param name="infile1" value="regression_X.tabular" ftype="tabular" />
-<param name="header1" value="false" />
+<param name="header1" value="true" />
-<param name="col1" value="1,2,3,4,5" />
+<param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" />
-<param name="infile2" value="regression_train.tabular" ftype="tabular" />
+<param name="infile2" value="regression_y.tabular" ftype="tabular" />
 <param name="col2" value="1" />
-<param name="header2" value="false" />
+<param name="header2" value="true" />
 <output name="outfile" file="feature_selection_result12" />
 </test>
 <test>
 <param name="selected_algorithm" value="RFECV" />
 <param name="input_mode" value="new" />

Mercurial > repos > bgruening > sklearn_feature_selection

comparison feature_selection.xml @ 34:b9d86fc6359d draft