1
|
1 <tool id="venn_list" name="Venn Diagram" version="0.0.3">
|
|
2 <description>from lists</description>
|
|
3 <command interpreter="python">
|
|
4 venn_list.py
|
|
5 #if $universe.type_select=="implicit":
|
|
6 - -
|
|
7 #else:
|
|
8 $main $main.ext
|
|
9 #end if
|
|
10 "$main_lab"
|
|
11 #for $s in $sets:
|
|
12 $s.set $s.set.ext "$s.lab"
|
|
13 #end for
|
|
14 $PDF</command>
|
|
15 <inputs>
|
|
16 <param name="main_lab" size="30" type="text" value="Venn Diagram" label="Plot title"/>
|
|
17 <conditional name="universe">
|
|
18 <param name="type_select" type="select" label="Implicit or explicit full ID list?">
|
|
19 <option value="explicit">Explicit</option>
|
|
20 <option value="implicit">Implicit (use union of sets below)</option>
|
|
21 </param>
|
|
22 <when value="explicit">
|
|
23 <param name="main" type="data" format="tabular,fasta,fastq,sff" label="Full dataset (with all identifiers)" help="Tabular file (uses column one), FASTA, FASTQ or SFF file."/>
|
|
24 </when>
|
|
25 <when value="implicit"/>
|
|
26 </conditional>
|
|
27 <repeat name="sets" min="1" max="3" title="Sets">
|
|
28 <param name="set" type="data" format="tabular,fasta,fastq,sff" label="Members of set" help="Tabular file (uses column one), FASTA, FASTQ or SFF file."/>
|
|
29 <param name="lab" size="30" type="text" value="Group" label="Caption for set"/>
|
|
30 </repeat>
|
|
31 </inputs>
|
|
32 <outputs>
|
|
33 <data format="pdf" name="PDF" />
|
|
34 </outputs>
|
|
35 <requirements>
|
|
36 <requirement type="python-module">rpy</requirement>
|
|
37 <requirement type="python-module">Bio</requirement>
|
|
38 </requirements>
|
|
39 <tests>
|
|
40 <!-- Doesn't seem to work properly, manages to get two sets, both
|
|
41 with same FASTA file, but second with default "Group" label.
|
|
42 <test>
|
|
43 <param name="type_select" value="explicit"/>
|
|
44 <param name="main" value="venn_list.tabular" ftype="tabular"/>
|
|
45 <param name="main_lab" value="Some Proteins"/>
|
|
46 <param name="set" value="rhodopsin_proteins.fasta"/>
|
|
47 <param name="lab" value="Rhodopsins"/>
|
|
48 <output name="PDF" file="venn_list1.pdf" ftype="pdf"/>
|
|
49 </test>
|
|
50 -->
|
|
51 <!-- Can't use more than one repeat value in tests (yet)
|
|
52 <test>
|
|
53 <param name="type_select" value="explicit"/>
|
|
54 <param name="main" value="venn_list.tabular" ftype="tabular"/>
|
|
55 <param name="main_lab" value="Some Proteins"/>
|
|
56 <param name="count" value="3"/>
|
|
57 <param name="set" value="rhodopsin_proteins.fasta"/>
|
|
58 <param name="lab" value="Rhodopsins"/>
|
|
59 <param name="set" value="four_human_proteins.fasta"/>
|
|
60 <param name="lab" value="Human"/>
|
|
61 <param name="set" value="blastp_four_human_vs_rhodopsin.tabular"/>
|
|
62 <param name="lab" value="Human vs Rhodopsin BLAST"/>
|
|
63 <output name="PDF" file="venn_list3.pdf" ftype="pdf"/>
|
|
64 </test>
|
|
65 -->
|
|
66 </tests>
|
|
67 <help>
|
|
68
|
|
69 .. class:: infomark
|
|
70
|
|
71 **TIP:** If your data is in tabular files, the identifier is assumed to be in column one.
|
|
72
|
|
73 **What it does**
|
|
74
|
|
75 Draws Venn Diagram for one, two or three sets (as a PDF file).
|
|
76
|
|
77 You must supply one, two or three sets of identifiers -- corresponding
|
|
78 to one, two or three circles on the Venn Diagram.
|
|
79
|
|
80 In general you should also give the full list of all the identifiers
|
|
81 explicitly. This is used to calculate the number of identifers outside
|
|
82 the circles (and check the identifiers in the other files match up).
|
|
83 The full list can be omitted by implicitly taking the union of the
|
|
84 category sets. In this case, the count outside the categories (circles)
|
|
85 will always be zero.
|
|
86
|
|
87 The identifiers can be taken from the first column of a tabular file
|
|
88 (e.g. query names in BLAST tabular output, or signal peptide predictions
|
|
89 after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF).
|
|
90
|
|
91 For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF
|
|
92 file), and the results of several different read mappings (e.g. to
|
|
93 different references) as tabular files (filtered to have just the mapped
|
|
94 reads). You could then show the different mappings (and their overlaps)
|
|
95 as a Venn Diagram, and the outside count would be the unmapped reads.
|
|
96
|
|
97 **Citations**
|
|
98
|
|
99 The Venn Diagrams are drawn using Gordon Smyth's limma package from
|
|
100 R/Bioconductor, http://www.bioconductor.org/
|
|
101
|
|
102 The R library is called from Python via rpy, http://rpy.sourceforge.net/
|
|
103
|
|
104 This tool uses Biopython to read SFF files. If you use this tool with
|
|
105 SFF files in scientific work leading to a publication, please cite the
|
|
106 Biopython application note:
|
|
107
|
|
108 Cock et al 2009. Biopython: freely available Python tools for computational
|
|
109 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
|
|
110 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
|
|
111
|
|
112 </help>
|
|
113 </tool>
|