annotate tools/plotting/venn_list.py @ 4:29fcd027d67a draft

Uploaded v0.0.6 take 1, simple test (which currently fails)
author peterjc
date Thu, 16 May 2013 12:43:57 -0400
parents aefc86eda5f6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
3
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
4 This script is copyright 2010 by Peter Cock, The James Hutton Institute
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
5 (formerly SCRI), UK. All rights reserved.
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
6 See accompanying text file for licence details (MIT/BSD style).
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
7
4
29fcd027d67a Uploaded v0.0.6 take 1, simple test (which currently fails)
peterjc
parents: 0
diff changeset
8 This is version 0.0.4 of the script.
0
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
9 """
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
10
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
11
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
12 import sys
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
13 import rpy
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
14
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
15 def stop_err(msg, error_level=1):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
16 """Print error message to stdout and quit with given error level."""
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
17 sys.stderr.write("%s\n" % msg)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
18 sys.exit(error_level)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
19
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
20 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
21 import rpy
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
22 except ImportError:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
23 stop_err("Requires the Python library rpy (to call R)")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
24
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
25 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
26 rpy.r.library("limma")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
27 except:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
28 stop_err("Requires the R library limma (for vennDiagram function)")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
29
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
30
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
31 if len(sys.argv)-1 not in [7, 10, 13]:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
32 stop_err("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
33
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
34 all_file, all_type, all_label = sys.argv[1:4]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
35 set_data = []
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
36 if len(sys.argv)-1 >= 7:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
37 set_data.append(tuple(sys.argv[4:7]))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
38 if len(sys.argv)-1 >= 10:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
39 set_data.append(tuple(sys.argv[7:10]))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
40 if len(sys.argv)-1 >= 13:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
41 set_data.append(tuple(sys.argv[10:13]))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
42 pdf_file = sys.argv[-1]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
43 n = len(set_data)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
44 print "Doing %i-way Venn Diagram" % n
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
45
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
46 def load_ids(filename, filetype):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
47 if filetype=="tabular":
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
48 for line in open(filename):
4
29fcd027d67a Uploaded v0.0.6 take 1, simple test (which currently fails)
peterjc
parents: 0
diff changeset
49 line = line.rstrip("\n")
29fcd027d67a Uploaded v0.0.6 take 1, simple test (which currently fails)
peterjc
parents: 0
diff changeset
50 if line and not line.startswith("#"):
29fcd027d67a Uploaded v0.0.6 take 1, simple test (which currently fails)
peterjc
parents: 0
diff changeset
51 yield line.split("\t",1)[0]
0
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
52 elif filetype=="fasta":
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
53 for line in open(filename):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
54 if line.startswith(">"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
55 yield line[1:].rstrip("\n").split(None,1)[0]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
56 elif filetype.startswith("fastq"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
57 #Use the Galaxy library not Biopython to cope with CS
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
58 from galaxy_utils.sequence.fastq import fastqReader
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
59 handle = open(filename, "rU")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
60 for record in fastqReader(handle):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
61 #The [1:] is because the fastaReader leaves the @ on the identifer.
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
62 yield record.identifier.split()[0][1:]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
63 handle.close()
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
64 elif filetype=="sff":
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
65 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
66 from Bio.SeqIO import index
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
67 except ImportError:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
68 stop_err("Require Biopython 1.54 or later (to read SFF files)")
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
69 #This will read the SFF index block if present (very fast)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
70 for name in index(filename, "sff"):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
71 yield name
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
72 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
73 stop_err("Unexpected file type %s" % filetype)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
74
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
75 def load_ids_whitelist(filename, filetype, whitelist):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
76 for name in load_ids(filename, filetype):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
77 if name in whitelist:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
78 yield name
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
79 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
80 stop_err("Unexpected ID %s in %s file %s" % (name, filetype, filename))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
81
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
82 if all_file in ["", "-", '""', '"-"']:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
83 #Load without white list
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
84 sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
85 #Take union
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
86 all = set()
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
87 for s in sets:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
88 all.update(s)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
89 print "Inferred total of %i IDs" % len(all)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
90 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
91 all = set(load_ids(all_file, all_type))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
92 print "Total of %i IDs" % len(all)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
93 sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
94
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
95 for s, (f,t,c) in zip(sets, set_data):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
96 print "%i in %s" % (len(s), c)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
97
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
98 #Now call R library to draw simple Venn diagram
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
99 try:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
100 #Create dummy Venn diagram counts object for three groups
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
101 cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
102 rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
103 rpy.r('colnames(groups) <- %s' % cols)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
104 rpy.r('vc <- vennCounts(groups)')
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
105 #Populate the 2^n classes with real counts
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
106 #Don't make any assumptions about the class order
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
107 #print rpy.r('vc')
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
108 for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
109 if isinstance(row, int) or isinstance(row, float):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
110 #Hack for rpy being too clever for single element row
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
111 row = [row]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
112 names = all
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
113 for wanted, s in zip(row, sets):
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
114 if wanted:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
115 names = names.intersection(s)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
116 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
117 names = names.difference(s)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
118 rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
119 #print rpy.r('vc')
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
120 if n == 1:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
121 #Single circle, don't need to add (Total XXX) line
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
122 names = [c for (t,f,c) in set_data]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
123 else:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
124 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
125 rpy.r.assign("names", names)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
126 rpy.r.assign("colors", ["red","green","blue"][:n])
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
127 rpy.r.pdf(pdf_file, 8, 8)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
128 rpy.r("""vennDiagram(vc, include="both", names=names,
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
129 main="%s", sub="(Total %i)",
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
130 circle.col=colors)
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
131 """ % (all_label, len(all)))
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
132 rpy.r.dev_off()
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
133 except Exception, exc:
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
134 stop_err( "%s" %str( exc ) )
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
135 rpy.r.quit( save="no" )
aefc86eda5f6 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
136 print "Done"