Mercurial > repos > bgruening > sklearn_pca
diff pca.xml @ 0:3dca4fb0b478 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 208a8d348e7c7a182cfbe1b6f17868146428a7e2"
| author | bgruening |
|---|---|
| date | Tue, 13 Apr 2021 21:15:47 +0000 |
| parents | |
| children | 6d181d3b6b4e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pca.xml Tue Apr 13 21:15:47 2021 +0000 @@ -0,0 +1,231 @@ +<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.05"> + <description>with scikit-learn</description> + <macros> + <import>main_macros.xml</import> + <token name="@GALAXY_VERSION@">0</token> + </macros> + <expand macro="python_requirements" /> + <expand macro="macro_stdio" /> + <version_command>echo "@VERSION@"</version_command> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/pca.py' + -i '$infile' + $header + -c '$column_selector_options.selected_column_selector_option' + #if $column_selector_options.selected_column_selector_option != 'all_columns' + -ci '$column_selector_options.col1' + #end if + #if $select_pca_type.number != '' + -n '$select_pca_type.number' + #end if + -t '$select_pca_type.select_pca_opts' + #if $select_pca_type.select_pca_opts == 'classical' + -s '$select_pca_type.select_solver_type.svd_solver_opts' + #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack' + -tol $select_pca_type.select_solver_type.tolerance + #end if + $select_pca_type.whiten + #elif $select_pca_type.select_pca_opts == 'incremental' + #if $select_pca_type.batch_size != '' + -b '$select_pca_type.batch_size' + #end if + $select_pca_type.whiten + #elif $select_pca_type.select_pca_opts == 'kernel' + -k '$select_pca_type.select_kernel_opts.kernel_opts' + #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly' + #if $select_pca_type.select_kernel_opts.gamma != '' + -g '$select_pca_type.select_kernel_opts.gamma' + #end if + -d '$select_pca_type.select_kernel_opts.degree' + -cf '$select_pca_type.select_kernel_opts.coef0' + #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf' + #if $select_pca_type.select_kernel_opts.gamma != '' + -g '$select_pca_type.select_kernel_opts.gamma' + #end if + #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid' + #if $select_pca_type.select_kernel_opts.gamma != '' + -g '$select_pca_type.select_kernel_opts.gamma' + #end if + -cf '$select_pca_type.select_kernel_opts.coef0' + #end if + -e '$select_pca_type.select_solver_type.eigen_solver_opts' + #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack' + -tol $select_pca_type.select_solver_type.tolerance + #if $select_pca_type.select_solver_type.max_iter != '' + -mi $select_pca_type.select_solver_type.max_iter + #end if + #end if + #end if + -o '$outfile' + ]]> + </command> + <inputs> + <param name="infile" type="data" format="tabular" label="Input file" /> + <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset" /> + <conditional name="column_selector_options"> + <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile" /> + </conditional> + <conditional name="select_pca_type"> + <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use"> + <option value="classical" selected="true">Classical PCA</option> + <option value="incremental">Incremental PCA</option> + <option value="kernel">Kernel PCA</option> + </param> + <when value="classical"> + <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" /> + <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features" /> + <conditional name="select_solver_type"> + <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition"> + <option value="auto" selected="true">auto</option> + <option value="full">full</option> + <option value="arpack">arpack</option> + <option value="randomized">randomized</option> + </param> + <when value="arpack"> + <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" /> + </when> + <when value="auto" /> + <when value="full" /> + <when value="randomized" /> + </conditional> + </when> + <when value="incremental"> + <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" /> + <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" /> + <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch" /> + </when> + <when value="kernel"> + <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" /> + <conditional name="select_kernel_opts"> + <param name="kernel_opts" type="select" label="Kernel Type"> + <option value="linear" selected="true">linear</option> + <option value="poly">poly</option> + <option value="rbf">rbf</option> + <option value="sigmoid">sigmoid</option> + <option value="cosine">cosine</option> + <option value="precomputed">precomputed</option> + </param> + <when value="poly"> + <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" /> + <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels" /> + <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" /> + </when> + <when value="sigmoid"> + <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" /> + <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" /> + </when> + <when value="rbf"> + <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" /> + </when> + <when value="linear" /> + <when value="cosine" /> + <when value="precomputed" /> + </conditional> + <conditional name="select_solver_type"> + <param name="eigen_solver_opts" type="select" label="Eigen Solver"> + <option value="auto" selected="true">auto</option> + <option value="dense">dense</option> + <option value="arpack">arpack</option> + </param> + <when value="arpack"> + <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" /> + <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack" /> + </when> + <when value="auto" /> + <when value="dense" /> + </conditional> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="outfile" /> + </outputs> + <tests> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular" /> + <param name="selected_column_selector_option" value="by_index_number" /> + <param name="col1" value="1,2,4,6,8,5" /> + <param name="number" value="5" /> + <param name="select_pca_opts" value="classical" /> + <param name="svd_solver_opts" value="arpack" /> + <param name="tolerance" value="0.4" /> + <output name="outfile" ftype='tabular' file="pca_classical_output.dat" /> + </test> + <test> + <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" /> + <param name="header" value="--header" /> + <param name="selected_column_selector_option" value="by_header_name" /> + <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5" /> + <param name="number" value="5" /> + <param name="select_pca_opts" value="classical" /> + <param name="svd_solver_opts" value="arpack" /> + <param name="tolerance" value="0.4" /> + <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat" /> + </test> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular" /> + <param name="selected_column_selector_option" value="all_but_by_index_number" /> + <param name="col1" value="8,5" /> + <param name="number" value="7" /> + <param name="select_pca_opts" value="incremental" /> + <param name="batch_size" value="64" /> + <output name="outfile" ftype='tabular' file="pca_incremental_output.dat" /> + </test> + <test> + <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" /> + <param name="header" value="--header" /> + <param name="selected_column_selector_option" value="all_but_by_header_name" /> + <param name="col1" value="col_8,col_5" /> + <param name="number" value="7" /> + <param name="select_pca_opts" value="incremental" /> + <param name="batch_size" value="64" /> + <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat" /> + </test> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular" /> + <param name="selected_column_selector_option" value="all_columns" /> + <param name="number" value="8" /> + <param name="select_pca_opts" value="kernel" /> + <param name="kernel_opts" value="linear" /> + <param name="eigen_solver_opts" value="arpack" /> + <param name="tolerance" value="4.3" /> + <param name="max_iter" value="8" /> + <output name="outfile" ftype="tabular"> + <assert_contents> + <has_n_lines n="300" /> + <has_n_columns n="8" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular" /> + <param name="selected_column_selector_option" value="all_columns" /> + <param name="number" value="8" /> + <param name="select_pca_opts" value="kernel" /> + <param name="kernel_opts" value="poly" /> + <param name="gamma" value="0.3" /> + <param name="degree" value="4" /> + <param name="coef0" value="1.6" /> + <param name="eigen_solver_opts" value="auto" /> + <output name="outfile" ftype="tabular"> + <assert_contents> + <has_n_lines n="300" /> + <has_n_columns n="8" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +This tool takes a tabular input file (one data point per row, each column a variable) +and performs principal component analysis (PCA) using Singular Value Decomposition, +returning an equally sized tabular file with the first PC in the first column, second +PC in the second column, etc. + ]]> + </help> + <expand macro="sklearn_citation" /> +</tool>
