0
|
1 <tool name="Venn Diagram" id="ceas_venn" version="0.1.0">
|
|
2 <description>Given 2 or 3 intervals, generate a venn diagram of their intersections</description>
|
|
3 <macros>
|
|
4 <import>corr_macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <command>
|
|
8 #if not bool(str($bf1label))
|
|
9 #set $bf1label=$bfile1.name
|
|
10 #end if
|
|
11 #if not bool(str($bf2label))
|
|
12 #set $bf2label=$bfile2.name
|
|
13 #end if
|
|
14 #if not bool(str($bf3label))
|
|
15 #set $bf3label=$bfile3.name
|
|
16 #end if
|
|
17 venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label" &> $log;
|
|
18 </command>
|
|
19 <inputs>
|
|
20 <param ftype="interval" format="bed" name="bfile1" type="data" label="BED file 1(total number of lines in the 3 files should not exceed 100,000)"/>
|
|
21 <param name="bf1label" type="text" label="Bed file 1 label:" optional="true"/>
|
|
22 <param ftype="interval" format="bed" name="bfile2" type="data" label="BED file 2(total number of lines in the 3 files should not exceed 100,000)"/>
|
|
23 <param name="bf2label" type="text" label="Bed file 2 label:" optional="true"/>
|
|
24 <param ftype="interval" format="bed" name="bfile3" type="data" label="BED file 3(total number of lines in the 3 files should not exceed 100,000)" optional="true"/>
|
|
25 <param name="bf3label" type="text" label="Bed file 3 label:" optional="true"/>
|
|
26 <param name="height" type="integer" label="Pic Height" value="500">
|
|
27 <validator type="in_range" max="5000" min="500" message="Pic Height is out of range, Pic Height has to be between 500 to 5000" />
|
|
28 </param>
|
|
29 <param name="width" type="integer" label="Pic Width" value="500">
|
|
30 <validator type="in_range" max="5000" min="500" message="Pic Width is out of range, Pic Width has to be between 500 to 5000" />
|
|
31 </param>
|
|
32 <param name="title" type="text" label="Diagram title" value="Bed Venn Diagram"/>
|
|
33 <param name="porportional" type="boolean" label="Draw Porportional Diagrams" checked="yes"
|
|
34 truevalue="" falsevalue="-n" />
|
|
35 </inputs>
|
|
36 <outputs>
|
|
37 <data format="png" name="output" from_work_dir="venn_diagram.png"/>
|
|
38 <data format="txt" name="log" label="log file" />
|
|
39 </outputs>
|
|
40 <expand macro="stdio"/>
|
|
41 <configfiles>
|
|
42 <configfile name="shscript">
|
|
43 #!/bin/bash
|
|
44 #import os
|
|
45
|
|
46 #include source=$script_chars#
|
|
47
|
|
48 #set $path = os.path.abspath($__app__.config.tool_path)
|
|
49
|
|
50 ##Note: the third bed file is optional, if it is not specified, we duplicate
|
|
51 ##the second file as the third argument
|
|
52 THIRD=$bfile3
|
|
53 if [ $bfile3 = "None" ]; then ##duplicate the second arg
|
|
54 THIRD=$bfile2
|
|
55 lines=`wc -l $bfile1 $bfile2 | tail -1 | awk '{print ${dollar}1}'`
|
|
56 else
|
|
57 lines=`wc -l $bfile1 $bfile2 ${dollar}THIRD | tail -1 | awk '{print ${dollar}1}'`
|
|
58 fi
|
|
59
|
|
60 ##check the format of the 3 bed files
|
|
61 format1='passed' ##`$path/validation/fcfunc.py $bfile1`
|
|
62 format2='passed' ##`$path/validation/fcfunc.py $bfile2`
|
|
63 format3='passed' ##`$path/validation/fcfunc.py ${dollar}THIRD`
|
|
64
|
|
65 #if not bool(str($bf1label))
|
|
66 #set $bf1label=$bfile1.name
|
|
67 #end if
|
|
68 #if not bool(str($bf2label))
|
|
69 #set $bf2label=$bfile2.name
|
|
70 #end if
|
|
71 #if not bool(str($bf3label))
|
|
72 #set $bf3label=$bfile3.name
|
|
73 #end if
|
|
74
|
|
75 #set $tlablelen1=len(str($bf1label))
|
|
76 #set $tlablelen2=len(str($bf2label))
|
|
77 #set $tlablelen3=len(str($bf3label))
|
|
78 #set $tlablelen4=len(str($title))
|
|
79
|
|
80 if [[ ${dollar}lines -gt 100000 ]];then
|
|
81 echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
|
|
82 exit;
|
|
83 elif [[ ${dollar}format1 != "passed" ]];then
|
|
84 echo "BED file 1: ${dollar}format1" ${gt}${ad}2;
|
|
85 exit;
|
|
86 elif [[ ${dollar}format2 != "passed" ]];then
|
|
87 echo "BED file 2: ${dollar}format2" ${gt}${ad}2;
|
|
88 exit;
|
|
89 elif [[ ${dollar}format3 != "passed" ]];then
|
|
90 echo "BED file 3: ${dollar}format3" ${gt}${ad}2;
|
|
91 exit;
|
|
92 elif [[ $tlablelen1 -gt 255 ]];then
|
|
93 echo "Bed file 1 label is too long! 255 characters is at most!" ${gt}${ad}2
|
|
94 exit;
|
|
95 elif [[ $tlablelen2 -gt 255 ]];then
|
|
96 echo "Bed file 2 label is too long! 255 characters is at most!" ${gt}${ad}2
|
|
97 exit;
|
|
98 elif [[ $tlablelen3 -gt 255 ]];then
|
|
99 echo "Bed file 3 label is too long! 255 characters is at most!" ${gt}${ad}2
|
|
100 exit;
|
|
101 elif [[ $tlablelen4 -gt 255 ]];then
|
|
102 echo "Diagram title is too long! 255 characters is at most!" ${gt}${ad}2
|
|
103 exit;
|
|
104 else
|
|
105 venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label" ${gt}${ad} $log;
|
|
106 cp venn_diagram.png $output;
|
|
107 fi
|
|
108 </configfile>
|
|
109 </configfiles>
|
|
110 <tests>
|
|
111 <test maxseconds="3600" name="VennDiagram_1">
|
|
112 <param name="bfile1" value="bedfile1.bed" />
|
|
113 <param name="bf1label" value="BedFile1" />
|
|
114 <param name="bfile2" value="bedfile2.bed" />
|
|
115 <param name="bf2label" value="BedFile2" />
|
|
116 <param name="bfile3" value="bedfile3.bed" />
|
|
117 <param name="bf3label" value="BedFile3" />
|
|
118 <param name="height" value="500" />
|
|
119 <param name="width" value="500" />
|
|
120 <param name="title" value="BedVennDiagram" />
|
|
121 <param name="porportional" value="" />
|
|
122 <output name="output" file="venndiagram_1/venndiagram_1.png" lines_diff = "40" />
|
|
123 <output name="output" file="venndiagram_1/venndiagram_1.log" lines_diff = "200" />
|
|
124 </test>
|
|
125 <test maxseconds="3600" name="VennDiagram_2">
|
|
126 <param name="bfile1" value="bedfile1.bed" />
|
|
127 <param name="bf1label" value="BedFile1" />
|
|
128 <param name="bfile2" value="bedfile2.bed" />
|
|
129 <param name="bf2label" value="BedFile2" />
|
|
130 <param name="bfile3" value="bedfile3.bed" />
|
|
131 <param name="bf3label" value="BedFile3" />
|
|
132 <param name="height" value="5000" />
|
|
133 <param name="width" value="5000" />
|
|
134 <param name="title" value="BedVennDiagram" />
|
|
135 <param name="porportional" value="" />
|
|
136 <output name="output" file="venndiagram_2/venndiagram_2.png" lines_diff = "40" />
|
|
137 <output name="output" file="venndiagram_2/venndiagram_2.log" lines_diff = "200" />
|
|
138 </test>
|
|
139 </tests>
|
|
140 <help>
|
|
141 This tool generates a venn diagram of the intersection of multiple intervals
|
|
142 files. The original code is written by Jacqueline Wentz and revised by
|
|
143 Tao Liu. It will calculate how many regions are overlapped between BED
|
|
144 files and use the Google Chart API to draw the final figure.
|
|
145
|
|
146 .. class:: warningmark
|
|
147
|
|
148 **CAUTION:** When three data sets are used, and their sizes differ a
|
|
149 lot. The figure may not be correct. But the numbers are correct.
|
|
150
|
|
151 **CAUTION:** The maximum number of lines in all the input files should
|
|
152 not exceed 100,000.
|
|
153
|
|
154 .. class:: warningmark
|
|
155
|
|
156 **NEED IMPROVEMENT**
|
|
157
|
|
158 -----
|
|
159
|
|
160 **Parameters**
|
|
161
|
|
162 - **BED file 1 and 2** are the two BED files to be used to calculate
|
|
163 the overlap.
|
|
164 - **BED file 3** is the third BED file to be used. It's optional.
|
|
165 - **BED file labels** name of the datasets displayed on the diagram
|
|
166 leave blank to use the default name
|
|
167 - **Pic Height** is the height of the final image.
|
|
168 - **Pic Width** is the width of the final image.
|
|
169 - **Diagram title** is the title of the final image.
|
|
170
|
|
171 -----
|
|
172
|
|
173 **Outputs**
|
|
174
|
|
175 - **PNG file** is the Venn diagram plot. The numbers of overlapped
|
|
176 regions are included in the figure.
|
|
177 - **LOG file** is the job log. If you see errors, please attach this in
|
|
178 the bug report.
|
|
179
|
|
180 -----
|
|
181
|
|
182 **summary**
|
|
183
|
|
184 For the regions in bed file, it will do a cluster first.
|
|
185 For example, we have 5 regions
|
|
186
|
|
187 bed A:
|
|
188 (200, 900)
|
|
189 (1000, 1200)
|
|
190
|
|
191 bed B:
|
|
192 (100, 300)
|
|
193 (700, 1100)
|
|
194 (1400, 1500)
|
|
195
|
|
196 Since they have overlaps, It will cluster them into 2 regions:
|
|
197 (100, 1200) (1400, 1500), and see whether each region
|
|
198 (of the 2 regions) is included in bed A or B. So,
|
|
199
|
|
200 (100, 1200) is included in bed A, B
|
|
201
|
|
202 (1400,1500) is included in bed B
|
|
203
|
|
204 Then we draw the venn diagram
|
|
205 </help>
|
|
206
|
|
207 </tool>
|