comparison venn_diagram.py @ 0:8d61f5ca50a4 draft

planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
author proteore
date Mon, 12 Nov 2018 11:26:40 -0500
parents
children b17f0fbbd6c9
comparison
equal deleted inserted replaced
-1:000000000000 0:8d61f5ca50a4
1 #!/usr/bin/env python2.7
2
3 import os
4 import sys
5 import json
6 import operator
7 import argparse
8 import re
9 from itertools import combinations
10
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
12
13 ##################################################################################################################################################
14 # FUNCTIONS
15 ##################################################################################################################################################
16
17 def isnumber(format, n):
18 """
19 Check if an element is integer or float
20 """
21 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
22 int_format = re.compile(r"^[-]?[1-9][0-9]*$")
23 test = ""
24 if format == "int":
25 test = re.match(int_format, n)
26 elif format == "float":
27 test = re.match(float_format, n)
28 if test:
29 return True
30 else:
31 return False
32
33 def input_to_dict(inputs):
34 """
35 Parse input and return a dictionary of name and data of each lists/files
36 """
37 comp_dict = {}
38 title_dict = {}
39 c = ["A", "B", "C", "D", "E", "F"]
40 for i in range(len(inputs)):
41 input_file = inputs[i][0]
42 name = inputs[i][1]
43 input_type = inputs[i][2]
44 title = c[i]
45 title_dict[title] = name
46 ids = set()
47 if input_type == "file":
48 header = inputs[i][3]
49 ncol = inputs[i][4]
50 file_content = open(input_file, "r").readlines()
51
52 # Check if column number is in right form
53 if isnumber("int", ncol.replace("c", "")):
54 if header == "true":
55 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]] # take only first IDs
56 else:
57 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]] # take only first IDs
58 else:
59 raise ValueError("Please fill in the right format of column number")
60 else:
61 ids = set()
62 file_content = inputs[i][0].split()
63
64 ids.update(file_content)
65 comp_dict[title] = ids
66
67 return comp_dict, title_dict
68
69 def intersect(comp_dict):
70 """
71 Calculate the intersections of input
72 """
73 names = set(comp_dict)
74 for i in range(1, len(comp_dict) + 1):
75 for group in combinations(sorted(comp_dict), i):
76 others = set()
77 [others.add(name) for name in names if name not in group]
78 difference = []
79 intersected = set.intersection(*(comp_dict[k] for k in group))
80 if len(others) > 0:
81 difference = intersected.difference(set.union(*(comp_dict[k] for k in others)))
82 yield group, list(intersected), list(difference)
83
84 def diagram(comp_dict, title_dict):
85 """
86 Create json string for jvenn diagram plot
87 """
88 result = {}
89 result["name"] = {}
90 for k in comp_dict.keys():
91 result["name"][k] = title_dict[k]
92
93 result["data"] = {}
94 result["values"] = {}
95 for group, intersected, difference in intersect(comp_dict):
96 if len(group) == 1:
97 result["data"]["".join(group)] = difference
98 result["values"]["".join(group)] = len(difference)
99 elif len(group) > 1 and len(group) < len(comp_dict):
100 result["data"]["".join(group)] = difference
101 result["values"]["".join(group)] = len(difference)
102 elif len(group) == len(comp_dict):
103 result["data"]["".join(group)] = intersected
104 result["values"]["".join(group)] = len(intersected)
105
106 return result
107
108 def write_text_venn(json_result):
109 """
110 Write intersections of input to text output file
111 """
112 output = open("venn_diagram_text_output.txt", "w")
113 string = ""
114 lines = []
115 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != [])
116 max_count = max(len(v) for v in result.values())
117 for i in range(max_count):
118 lines.append("")
119
120 for i in range(max_count):
121 header = ""
122 for d in range(len(result.keys())):
123 data = result.keys()[d]
124 name = "_".join([json_result["name"][x] for x in data])
125 header += name + "\t"
126 if len(result[data]) > i:
127 print("a", result[data][i])
128 lines[i] += result[data][i] + "\t"
129 else:
130 lines[i] += "\t"
131 # Strip last tab in the end of the lines
132 header = header.rstrip()
133 lines = [line.rstrip() for line in lines]
134 string += header + "\n"
135 string += "\n".join(lines)
136 output.write(string)
137 output.close()
138
139 def write_summary(summary_file, inputs):
140 """
141 Paste json string into template file
142 """
143 a, b = input_to_dict(inputs)
144 data = diagram(a, b)
145 write_text_venn(data)
146
147 to_replace = {
148 "series": [data],
149 "displayStat": "true",
150 "displaySwitch": "true",
151 "shortNumber": "true",
152 }
153
154 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
155 FH_summary_out = open(summary_file, "w" )
156 for line in FH_summary_tpl:
157 if "###JVENN_DATA###" in line:
158 line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
159 FH_summary_out.write(line)
160
161 FH_summary_out.close()
162 FH_summary_tpl.close()
163
164 def process(args):
165 write_summary(args.summary, args.input)
166
167
168 ##################################################################################################################################################
169 # MAIN
170 ##################################################################################################################################################
171 if __name__ == '__main__':
172 # Parse parameters
173 parser = argparse.ArgumentParser(description='Filters an abundance file')
174 group_input = parser.add_argument_group( 'Inputs' )
175 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.")
176 group_output = parser.add_argument_group( 'Outputs' )
177 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]")
178 args = parser.parse_args()
179
180 # Process
181 process( args )