annotate tools/plotting/venn_list.py @ 2:ef1506a2be6a draft

Uploaded v0.0.5 avoiding weird folder names from Galaxy Tool Shed download
author peterjc
date Thu, 16 May 2013 12:38:31 -0400
parents aefc86eda5f6
children 29fcd027d67a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
3
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
4 This script is copyright 2010 by Peter Cock, The James Hutton Institute
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
5 (formerly SCRI), UK. All rights reserved.
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
6 See accompanying text file for licence details (MIT/BSD style).
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
7
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
8 This is version 0.0.3 of the script.
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
9 """
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
10
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
11
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
12 import sys
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
13 import rpy
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
14
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
15 def stop_err(msg, error_level=1):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
16 """Print error message to stdout and quit with given error level."""
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
17 sys.stderr.write("%s\n" % msg)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
18 sys.exit(error_level)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
19
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
20 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
21 import rpy
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
22 except ImportError:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
23 stop_err("Requires the Python library rpy (to call R)")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
24
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
25 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
26 rpy.r.library("limma")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
27 except:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
28 stop_err("Requires the R library limma (for vennDiagram function)")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
29
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
30
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
31 if len(sys.argv)-1 not in [7, 10, 13]:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
32 stop_err("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
33
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
34 all_file, all_type, all_label = sys.argv[1:4]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
35 set_data = []
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
36 if len(sys.argv)-1 >= 7:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
37 set_data.append(tuple(sys.argv[4:7]))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
38 if len(sys.argv)-1 >= 10:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
39 set_data.append(tuple(sys.argv[7:10]))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
40 if len(sys.argv)-1 >= 13:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
41 set_data.append(tuple(sys.argv[10:13]))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
42 pdf_file = sys.argv[-1]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
43 n = len(set_data)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
44 print "Doing %i-way Venn Diagram" % n
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
45
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
46 def load_ids(filename, filetype):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
47 if filetype=="tabular":
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
48 for line in open(filename):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
49 if not line.startswith("#"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
50 yield line.rstrip("\n").split("\t",1)[0]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
51 elif filetype=="fasta":
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
52 for line in open(filename):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
53 if line.startswith(">"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
54 yield line[1:].rstrip("\n").split(None,1)[0]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
55 elif filetype.startswith("fastq"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
56 #Use the Galaxy library not Biopython to cope with CS
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
57 from galaxy_utils.sequence.fastq import fastqReader
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
58 handle = open(filename, "rU")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
59 for record in fastqReader(handle):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
60 #The [1:] is because the fastaReader leaves the @ on the identifer.
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
61 yield record.identifier.split()[0][1:]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
62 handle.close()
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
63 elif filetype=="sff":
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
64 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
65 from Bio.SeqIO import index
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
66 except ImportError:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
67 stop_err("Require Biopython 1.54 or later (to read SFF files)")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
68 #This will read the SFF index block if present (very fast)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
69 for name in index(filename, "sff"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
70 yield name
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
71 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
72 stop_err("Unexpected file type %s" % filetype)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
73
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
74 def load_ids_whitelist(filename, filetype, whitelist):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
75 for name in load_ids(filename, filetype):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
76 if name in whitelist:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
77 yield name
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
78 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
79 stop_err("Unexpected ID %s in %s file %s" % (name, filetype, filename))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
80
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
81 if all_file in ["", "-", '""', '"-"']:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
82 #Load without white list
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
83 sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
84 #Take union
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
85 all = set()
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
86 for s in sets:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
87 all.update(s)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
88 print "Inferred total of %i IDs" % len(all)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
89 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
90 all = set(load_ids(all_file, all_type))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
91 print "Total of %i IDs" % len(all)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
92 sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
93
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
94 for s, (f,t,c) in zip(sets, set_data):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
95 print "%i in %s" % (len(s), c)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
96
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
97 #Now call R library to draw simple Venn diagram
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
98 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
99 #Create dummy Venn diagram counts object for three groups
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
100 cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
101 rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
102 rpy.r('colnames(groups) <- %s' % cols)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
103 rpy.r('vc <- vennCounts(groups)')
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
104 #Populate the 2^n classes with real counts
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
105 #Don't make any assumptions about the class order
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
106 #print rpy.r('vc')
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
107 for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
108 if isinstance(row, int) or isinstance(row, float):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
109 #Hack for rpy being too clever for single element row
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
110 row = [row]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
111 names = all
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
112 for wanted, s in zip(row, sets):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
113 if wanted:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
114 names = names.intersection(s)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
115 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
116 names = names.difference(s)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
117 rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
118 #print rpy.r('vc')
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
119 if n == 1:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
120 #Single circle, don't need to add (Total XXX) line
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
121 names = [c for (t,f,c) in set_data]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
122 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
123 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
124 rpy.r.assign("names", names)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
125 rpy.r.assign("colors", ["red","green","blue"][:n])
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
126 rpy.r.pdf(pdf_file, 8, 8)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
127 rpy.r("""vennDiagram(vc, include="both", names=names,
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
128 main="%s", sub="(Total %i)",
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
129 circle.col=colors)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
130 """ % (all_label, len(all)))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
131 rpy.r.dev_off()
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
132 except Exception, exc:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
133 stop_err( "%s" %str( exc ) )
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
134 rpy.r.quit( save="no" )
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
135 print "Done"