annotate pca.xml @ 0:ffcdde989859 draft

Uploaded
author iuc
date Tue, 29 Jul 2014 06:30:45 -0400
parents
children 2e7bc1bb2dbe
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ffcdde989859 Uploaded
iuc
parents:
diff changeset
1 <tool id="pca1" name="Principal Component Analysis" version="1.1.0">
ffcdde989859 Uploaded
iuc
parents:
diff changeset
2 <description> </description>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
3 <expand macro="requirements" />
ffcdde989859 Uploaded
iuc
parents:
diff changeset
4 <macros>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
5 <import>statistic_tools_macros.xml</import>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
6 </macros>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
7 <command interpreter="python">
ffcdde989859 Uploaded
iuc
parents:
diff changeset
8 pca.py
ffcdde989859 Uploaded
iuc
parents:
diff changeset
9 $input1
ffcdde989859 Uploaded
iuc
parents:
diff changeset
10 $var_cols
ffcdde989859 Uploaded
iuc
parents:
diff changeset
11 $methodChoice.method
ffcdde989859 Uploaded
iuc
parents:
diff changeset
12 $out_file1
ffcdde989859 Uploaded
iuc
parents:
diff changeset
13 $out_file2
ffcdde989859 Uploaded
iuc
parents:
diff changeset
14 #if $methodChoice.method == "svd":
ffcdde989859 Uploaded
iuc
parents:
diff changeset
15 $methodChoice.scale
ffcdde989859 Uploaded
iuc
parents:
diff changeset
16 #end if
ffcdde989859 Uploaded
iuc
parents:
diff changeset
17 </command>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
18 <inputs>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
19 <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
20 <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
ffcdde989859 Uploaded
iuc
parents:
diff changeset
21 <validator type="no_options" message="Please select at least one column."/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
22 </param>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
23 <conditional name="methodChoice">
ffcdde989859 Uploaded
iuc
parents:
diff changeset
24 <param name="method" type="select" label="Method" help="The correlation matrix can only be used if there are no constant variables">
ffcdde989859 Uploaded
iuc
parents:
diff changeset
25 <option value="cor" selected="true">Eigenvectors of Correlation (princomp)</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
26 <option value="cov">Eigenvectors of Covariance (princomp)</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
27 <option value="svd">Singular Value Decomposition (prcomp)</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
28 </param>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
29 <when value="cor" />
ffcdde989859 Uploaded
iuc
parents:
diff changeset
30 <when value="cov" />
ffcdde989859 Uploaded
iuc
parents:
diff changeset
31 <when value="svd">
ffcdde989859 Uploaded
iuc
parents:
diff changeset
32 <param name="scale" type="select" label="Centering and Scaling" help="Can be used to center and/or scale variables">
ffcdde989859 Uploaded
iuc
parents:
diff changeset
33 <option value="none" selected="true">None</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
34 <option value="center">Center only</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
35 <option value="scale">Scale only</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
36 <option value="both">Center and Scale</option>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
37 </param>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
38 </when>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
39 </conditional>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
40 </inputs>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
41 <outputs>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
42 <data format="input" name="out_file1" metadata_source="input1" />
ffcdde989859 Uploaded
iuc
parents:
diff changeset
43 <data format="pdf" name="out_file2" />
ffcdde989859 Uploaded
iuc
parents:
diff changeset
44 </outputs>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
45 <tests>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
46 <test>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
47 <param name="input1" value="iris.tabular"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
48 <param name="var_cols" value="1,2,3,4"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
49 <param name="method" value="cor"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
50 <output name="out_file1" file="pca_out1.tabular"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
51 <output name="out_file2" file="pca_out2.pdf"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
52 </test>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
53 <test>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
54 <param name="input1" value="iris.tabular"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
55 <param name="var_cols" value="1,2,3,4"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
56 <param name="method" value="cov"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
57 <output name="out_file1" file="pca_out3.tabular"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
58 <output name="out_file2" file="pca_out4.pdf"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
59 </test>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
60 <test>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
61 <param name="input1" value="iris.tabular"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
62 <param name="var_cols" value="1,2,3,4"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
63 <param name="method" value="svd"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
64 <param name="scale" value="both"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
65 <output name="out_file1" file="pca_out5.tabular"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
66 <output name="out_file2" file="pca_out6.pdf"/>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
67 </test>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
68 </tests>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
69 <help>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
70
ffcdde989859 Uploaded
iuc
parents:
diff changeset
71
ffcdde989859 Uploaded
iuc
parents:
diff changeset
72 .. class:: infomark
ffcdde989859 Uploaded
iuc
parents:
diff changeset
73
ffcdde989859 Uploaded
iuc
parents:
diff changeset
74 **TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
ffcdde989859 Uploaded
iuc
parents:
diff changeset
75
ffcdde989859 Uploaded
iuc
parents:
diff changeset
76 -----
ffcdde989859 Uploaded
iuc
parents:
diff changeset
77
ffcdde989859 Uploaded
iuc
parents:
diff changeset
78 .. class:: infomark
ffcdde989859 Uploaded
iuc
parents:
diff changeset
79
ffcdde989859 Uploaded
iuc
parents:
diff changeset
80 **What it does**
ffcdde989859 Uploaded
iuc
parents:
diff changeset
81
ffcdde989859 Uploaded
iuc
parents:
diff changeset
82 This tool performs Principal Component Analysis on the given numeric input data using functions from R statistical package - 'princomp' function (for Eigenvector based solution) and 'prcomp' function (for Singular value decomposition based solution). It outputs two files, one containing the summary statistics of PCA, and the other containing biplots of the observations and principal components.
ffcdde989859 Uploaded
iuc
parents:
diff changeset
83
ffcdde989859 Uploaded
iuc
parents:
diff changeset
84 *R Development Core Team (2009). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.*
ffcdde989859 Uploaded
iuc
parents:
diff changeset
85
ffcdde989859 Uploaded
iuc
parents:
diff changeset
86 -----
ffcdde989859 Uploaded
iuc
parents:
diff changeset
87
ffcdde989859 Uploaded
iuc
parents:
diff changeset
88 .. class:: warningmark
ffcdde989859 Uploaded
iuc
parents:
diff changeset
89
ffcdde989859 Uploaded
iuc
parents:
diff changeset
90 **Note**
ffcdde989859 Uploaded
iuc
parents:
diff changeset
91
ffcdde989859 Uploaded
iuc
parents:
diff changeset
92 - This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
ffcdde989859 Uploaded
iuc
parents:
diff changeset
93
ffcdde989859 Uploaded
iuc
parents:
diff changeset
94 - The summary statistics in the output are described below:
ffcdde989859 Uploaded
iuc
parents:
diff changeset
95
ffcdde989859 Uploaded
iuc
parents:
diff changeset
96 - Std. deviation: Standard deviations of the principal components
ffcdde989859 Uploaded
iuc
parents:
diff changeset
97 - Loadings: a list of eigen-vectors/variable loadings
ffcdde989859 Uploaded
iuc
parents:
diff changeset
98 - Scores: Scores of the input data on the principal components
ffcdde989859 Uploaded
iuc
parents:
diff changeset
99
ffcdde989859 Uploaded
iuc
parents:
diff changeset
100 </help>
ffcdde989859 Uploaded
iuc
parents:
diff changeset
101 </tool>