sklearn_feature_selection: feature_selection.xml comparison

comparison feature_selection.xml @ 20:547fb1cde4cc draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty

author	bgruening
date	Fri, 09 Aug 2019 06:35:08 -0400
parents	15d8ba35c23c
children	bfdf732091c1

comparison

equal deleted inserted replaced

-:134fe32e3701
+:547fb1cde4cc
-<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1">
+<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@">
 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
 <macros>
 <import>main_macros.xml</import>
 </macros>
 <expand macro="python_requirements"/>
 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
 svm, linear_model, tree, discriminant_analysis)
 from imblearn.pipeline import Pipeline as imbPipeline
 from sklearn.pipeline import Pipeline
-sys.path.insert(0, '$__tool_directory__')
+from galaxy_ml.utils import (SafeEval, feature_selector,
-from utils import SafeEval, feature_selector, read_columns
+read_columns, get_module)
 warnings.simplefilter('ignore')
 safe_eval = SafeEval()
 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g'
 #end if
 ## Read features
 features_has_header = params['input_options']['header1']
-input_type = params['input_options']['selected_input']
+#if $input_options.selected_input == 'tabular'
-if input_type == 'tabular':
+header = 'infer' if features_has_header else None
-header = 'infer' if features_has_header else None
+column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
-column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
+if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
-if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
+c = params['input_options']['column_selector_options_1']['col1']
-c = params['input_options']['column_selector_options_1']['col1']
-else:
-c = None
-X, input_df = read_columns(
-'$input_options.infile1',
-c = c,
-c_option = column_option,
-return_df = True,
-sep='\t',
-header=header,
-parse_dates=True)
-X = X.astype(float)
 else:
-X = mmread('$input_options.infile1')
+c = None
+X, input_df = read_columns(
+'$input_options.infile1',
+c = c,
+c_option = column_option,
+return_df = True,
+sep='\t',
+header=header,
+parse_dates=True)
+X = X.astype(float)
+#elif $input_options.selected_input == 'seq_fasta'
+fasta_file = '$input_options.fasta_file'
+pyfaidx = get_module('pyfaidx')
+sequences = pyfaidx.Fasta(fasta_file)
+n_seqs = len(sequences.keys())
+X = np.arange(n_seqs)[:, np.newaxis]
+for param in estimator_params.keys():
+if param.endswith('fasta_path'):
+estimator.set_params(
+**{param: fasta_file})
+else:
+raise ValueError(
+"The selected estimator doesn't support "
+"fasta file input! Please consider using "
+"KerasGBatchClassifier with "
+"FastaDNABatchGenerator/FastaProteinBatchGenerator "
+"or having GenomeOneHotEncoder/ProteinOneHotEncoder "
+"in pipeline!")
+#elif $input_options.selected_input == 'sparse'
+X = mmread('$input_options.infile1')
+#end if
 ## Read labels
 header = 'infer' if params['input_options']['header2'] else None
 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:

Mercurial > repos > bgruening > sklearn_feature_selection

comparison feature_selection.xml @ 20:547fb1cde4cc draft