diff kpca.xml @ 0:5642f7ee948b draft

Imported from capsule None
author devteam
date Mon, 19 May 2014 11:00:04 -0400
parents
children 9246ab62f58f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kpca.xml	Mon May 19 11:00:04 2014 -0400
@@ -0,0 +1,144 @@
+<tool id="kpca1" name="Kernel Principal Component Analysis" version="1.0.0">
+  <description> </description>
+  <requirements>
+    <requirement type="package" version="1.0.3">rpy</requirement>
+    <requirement type="package" version="2.11.0">R</requirement>
+    <requirement type="package" version="0.1-4">kernlab</requirement>
+    <requirement type="package" version="1.7.1">numpy</requirement>
+    <requirement type="package" version="0.7.1">bx-python</requirement>
+  </requirements>
+  <command interpreter="python">
+    kpca.py 
+      --input=$input1
+      --output1=$out_file1
+      --output2=$out_file2
+      --var_cols=$var_cols
+      --kernel=$kernelChoice.kernel
+      --features=$features
+      #if $kernelChoice.kernel == "rbfdot" or $kernelChoice.kernel == "anovadot":
+      --sigma=$kernelChoice.sigma
+      --degree="None"
+      --scale="None"
+      --offset="None"
+      --order="None"
+      #elif $kernelChoice.kernel == "polydot":
+      --sigma="None"
+      --degree=$kernelChoice.degree
+      --scale=$kernelChoice.scale
+      --offset=$kernelChoice.offset
+      --order="None"
+      #elif $kernelChoice.kernel == "tanhdot":
+      --sigma="None"
+      --degree="None"
+      --scale=$kernelChoice.scale
+      --offset=$kernelChoice.offset
+      --order="None"
+      #elif $kernelChoice.kernel == "besseldot":
+      --sigma=$kernelChoice.sigma
+      --degree=$kernelChoice.degree
+      --scale="None"
+      --offset="None"
+      --order=$kernelChoice.order
+      #elif $kernelChoice.kernel == "anovadot":
+      --sigma=$kernelChoice.sigma
+      --degree=$kernelChoice.degree
+      --scale="None"
+      --offset="None"
+      --order="None"
+      #else:
+      --sigma="None"
+      --degree="None"
+      --scale="None"
+      --offset="None"
+      --order="None"
+      #end if
+  </command>
+  <inputs>
+    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
+    <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
+        <validator type="no_options" message="Please select at least one column."/>
+    </param>
+    <param name="features" size="10" type="integer" value="2" label="Number of principal components to return" help="To return all, enter 0"/>
+    <conditional name="kernelChoice">
+        <param name="kernel" type="select" label="Kernel function">
+            <option value="rbfdot" selected="true">Gaussian Radial Basis Function</option>
+            <option value="polydot">Polynomial</option>
+            <option value="vanilladot">Linear</option>
+            <option value="tanhdot">Hyperbolic</option>
+            <option value="laplacedot">Laplacian</option>
+            <option value="besseldot">Bessel</option>
+            <option value="anovadot">ANOVA Radial Basis Function</option>
+            <option value="splinedot">Spline</option>
+        </param>
+        <when value="vanilladot" />
+        <when value="splinedot" />
+        <when value="rbfdot">
+            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
+        </when>
+        <when value="laplacedot">
+            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
+        </when>
+        <when value="polydot">
+            <param name="degree" size="10" type="integer" value="1" label="degree" />
+            <param name="scale" size="10" type="integer" value="1" label="scale" />
+            <param name="offset" size="10" type="integer" value="1" label="offset" />
+        </when>
+        <when value="tanhdot">
+            <param name="scale" size="10" type="integer" value="1" label="scale" />
+            <param name="offset" size="10" type="integer" value="1" label="offset" />
+        </when>
+        <when value="besseldot">
+            <param name="sigma" size="10" type="integer" value="1" label="sigma" />
+            <param name="order" size="10" type="integer" value="1" label="order" />
+            <param name="degree" size="10" type="integer" value="1" label="degree" />
+        </when>
+        <when value="anovadot">
+            <param name="sigma" size="10" type="integer" value="1" label="sigma" />
+            <param name="degree" size="10" type="integer" value="1" label="degree" />
+        </when>
+    </conditional>    
+  </inputs>
+  <outputs>
+    <data format="input" name="out_file1" metadata_source="input1" />
+    <data format="pdf" name="out_file2" />
+  </outputs>
+  <tests>
+    <test>
+        <param name="input1" value="iris.tabular"/>
+        <param name="var_cols" value="1,2,3,4"/>
+        <param name="kernel" value="polydot"/>
+        <param name="features" value="2"/>
+        <param name="offset" value="0"/>
+        <param name="scale" value="1"/>
+        <param name="degree" value="2"/>
+        <output name="out_file1" file="kpca_out1.tabular"/>
+        <output name="out_file2" file="kpca_out2.pdf"/>
+    </test>
+  </tests>
+  <help>
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool uses functions from 'kernlab' library from R statistical package to perform Kernel Principal Component Analysis (kPCA) on the input data. It outputs two files, one containing the summary statistics of the performed kPCA, and the other containing a scatterplot matrix of rotated values reported by kPCA.   
+
+*Alexandros Karatzoglou, Alex Smola, Kurt Hornik, Achim Zeileis (2004). kernlab - An S4 Package for Kernel Methods in R. Journal of Statistical Software 11(9), 1-20. URL http://www.jstatsoft.org/v11/i09/*
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
+
+  </help>
+</tool>