comparison graphembed.xml @ 0:388a17402cca draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/graphembed/ commit fa8fcfdd990ab21b053c3a209018a21c89f6a82b"
author iuc
date Tue, 10 Dec 2019 16:40:04 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:388a17402cca
1 <tool id="graphembed" name="GraphEmbed" version="@VERSION@.0" >
2 <description>Compute and plot a 2D embedding of a data matrix given supervised class information</description>
3 <macros>
4 <token name="@VERSION@">2.4</token>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@VERSION@">graph_embed</requirement>
8 </requirements>
9 <version_command><![CDATA[
10 graphembed --version | cut -d'v' -f 2
11 ]]></version_command>
12 <command detect_errors='exit_code'><![CDATA[
13 graphembed
14 -i '$i'
15 -t '$t'
16 -c '$class_confidence'
17 -k '$k'
18 -d '$d'
19 -z '$z'
20 -l '$l'
21 #if str($advanced.option) == 'yes'
22 '$advanced.correlation_transformation'
23 '$advanced.feature_selection'
24 '$advanced.normalization'
25 --cmap_name '$advanced.cmap_name'
26 --min_threshold '$advanced.min_threshold'
27 --max_threshold '$advanced.max_threshold'
28 --random_state '$advanced.random_state'
29 #end if
30 --do_not_add_timestamp
31 --figure_size 15
32 -o ./
33
34 ]]>
35 </command>
36
37 <inputs>
38 <param argument="-i" type="data" format="tabular" label="Input Matrix"
39 help="An feature-observation matrix, with features as rows and observations as columns (e.g. Genes vs Cells)" />
40 <param argument="-t" type="data" format="tabular" label="Observation Classes"
41 help="A two-column file with observations in the first column, and an integer representing their assigned class in the second column (e.g. 'YFPCD24X3w_20 2')" />
42 <param argument="--class_confidence" type="float" value="1.0"
43 label="Confidence bias for clustering" />
44 <param argument="-k" type="integer" value="5" min="0"
45 label="Number of links towards closest neighbour with the same class" />
46 <param argument="-d" type="integer" value="1" min="0"
47 label="Number of links towards denser neighbours with a different class" />
48 <param argument="-z" type="integer" value="10" min="0"
49 label="Number of nearest neighbours to limit the horizon of where to
50 search for denser neighbours of a different class" />
51 <param argument="-l" type="integer" value="0"
52 label="Number of mutual nearest neighbours that define outlier instances." />
53 <conditional name="advanced" >
54 <param name="option" type="select" label="Use Advanced Parameters?">
55 <option value="yes" >Yes</option>
56 <option value="no" selected="true" >No</option>
57 </param>
58 <when value="no" ></when>
59 <when value="yes" >
60 <param argument="--correlation_transformation" type="boolean" optional="true"
61 falsevalue="" truevalue="--correlation_transformation"
62 label="Convert data matrix to corr coeff matrix" />
63 <param argument="--normalization" type="boolean" optional="true"
64 falsevalue="" truevalue="--normalization"
65 label="Convert data matrix to normalized matrix" />
66 <param argument="--feature_selection" type="boolean" optional="true"
67 falsevalue="" truevalue="--feature_selection"
68 label="Select most discriminative features" />
69 <param argument="--cmap_name" type="select"
70 label="Color map" >
71 <option value="cubehelix" />
72 <option value="terrain" />
73 <option value="nipy_spectral" />
74 <option value="gist_stern" />
75 <option value="gist_ncar" selected="true" />
76 </param>
77 <param argument="--min_threshold" type="integer" value="5"
78 label="Min num instances per class" />
79 <param argument="--max_threshold" type="integer" value="400"
80 label="Max num instances per class" />
81 <param argument="--random_state" type="integer" value="1"
82 label="Random seed" />
83 </when>
84 </conditional>
85 <param name="showlog" type="boolean" checked="true" label="Output Log file" />
86 </inputs>
87
88 <outputs>
89 <data name="out_coords" format="tabular" from_work_dir="2D_coords.txt"
90 label="${tool.name} on ${on_string}: 2D coords" />
91 <data name="out_target" format="tabular" from_work_dir="classes.txt"
92 label="${tool.name} on ${on_string}: Classes" />
93 <data name="out_clean" format="pdf" from_work_dir="img_1_clean.pdf"
94 label="${tool.name} on ${on_string}: Embed." />
95 <data name="out_links" format="pdf" from_work_dir="img_2_links.pdf"
96 label="${tool.name} on ${on_string}: Embed + 1st shift" />
97 <data name="out_hull" format="pdf" from_work_dir="img_4_hull.pdf"
98 label="${tool.name} on ${on_string}: Embed + hulls + edges" />
99 <data name="out_hulllink" format="pdf" from_work_dir="img_5_hull_link.pdf"
100 label="${tool.name} on ${on_string}: Embed + hulls + 1st shift" />
101 <data name="out_log" format="txt" from_work_dir="log"
102 label="${tool.name} on ${on_string}: Log" >
103 <filter>showlog</filter>
104 </data>
105 </outputs>
106
107 <tests>
108 <test expect_num_outputs="7" >
109 <param name="i" value="prot_expression.mini.tsv" ftype="tabular" />
110 <param name="t" value="target.tsv" ftype="tabular" />
111 <output name="out_coords" >
112 <assert_contents>
113 <has_text text="-3.8328 2.0278" />
114 <has_text text="7.0832 -4.1420" />
115 </assert_contents>
116 </output>
117 <output name="out_target" >
118 <assert_contents>
119 <has_text text="14" />
120 <has_text text="8" />
121 </assert_contents>
122 </output>
123 </test>
124 <test expect_num_outputs="6" >
125 <param name="i" value="prot_expression.mini.tsv" ftype="tabular" />
126 <param name="t" value="target.tsv" ftype="tabular" />
127 <param name="class_confidence" value="1.5" />
128 <param name="k" value="5" />
129 <param name="z" value="10" />
130 <param name="l" value="0" />
131 <param name="random_state" value="2" />
132 <conditional name="advanced" >
133 <param name="option" value="yes" />
134 <param name="correlation_transformation" value="true" />
135 <param name="feature_selection" value="true" />
136 <param name="normalization" value="true" />
137 </conditional>
138 <param name="showlog" value="false" />
139 <output name="out_target" >
140 <assert_contents>
141 <has_text text="14" />
142 <has_text text="16" />
143 </assert_contents>
144 </output>
145 <output name="out_coords" >
146 <assert_contents>
147 <has_text text="25.9260 0.0171" />
148 <has_text text="-6.4521 -24.8940" />
149 </assert_contents>
150 </output>
151 </test>
152 </tests>
153 <help>
154 ============
155 GraphEmbed
156 ============
157 *Compute a 2D embedding of a data matrix given supervised class information.*
158
159 Input: A discrete label for each instance is expected.
160
161 A graph is built where nodes are instances and there exist two types of edges:
162
163 * 'knn' edges
164 An edge to the k-th nearest instance that has the same label.
165 * 'k_shift' edges
166 An edge to the k-th nearest instance that is denser and has a different label
167
168 Density is defined as the sum of the pairwise cosine similarity between an instance and all the other instances. The desired edge length is the euclidean distance between the instances. If the endpoints of an edge have the same label then the desired distance is divided by 1 + class_confidence. A k-shift edge is deleted if at least one of the endpoints is an outlier. Outlier nodes are defined as those instances that have no mutual k neighbors.
169
170 Finally the embedding is computed as the 2D coordinates of the corresponding graph embedding using the force layout algorithm from Tomihisa Kamada, and Satoru Kawai. "An algorithm for drawing general undirected graphs.", Information processing letters 31, no. 1 (1989): 7-15. </help>
171 <citations>
172 <citation type="doi">10.5281/zenodo.825832</citation>
173 </citations>
174 </tool>