comparison venn_diagram.py @ 7:087011d9e3aa draft

"planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
author proteore
date Tue, 11 May 2021 14:59:54 +0000
parents 95bbe3a6b09f
children 415ec6611b1d
comparison
equal deleted inserted replaced
6:95bbe3a6b09f 7:087011d9e3aa
1 #!/usr/bin/env python2.7 1 #!/usr/bin/env python
2 2
3 import argparse
4 import csv
5 import json
3 import os 6 import os
4 import sys 7 import sys # noqa 401
5 import json 8 import operator # noqa 401
6 import operator 9 import re
7 import argparse
8 import re, csv
9 from itertools import combinations 10 from itertools import combinations
10 11
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
12 13
13 ################################################################################################################################################## 14 ########################################################################
14 # FUNCTIONS 15 # FUNCTIONS
15 ################################################################################################################################################## 16 ########################################################################
16 17
18
17 def isnumber(format, n): 19 def isnumber(format, n):
18 """ 20 """
19 Check if an element is integer or float 21 Check if an element is integer or float
20 """ 22 """
21 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") 23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
27 test = re.match(float_format, n) 29 test = re.match(float_format, n)
28 if test: 30 if test:
29 return True 31 return True
30 else: 32 else:
31 return False 33 return False
32 34
35
33 def input_to_dict(inputs): 36 def input_to_dict(inputs):
34 """ 37 """
35 Parse input and return a dictionary of name and data of each lists/files 38 Parse input and return a dictionary of name and data of each lists/files
36 """ 39 """
37 comp_dict = {} 40 comp_dict = {}
38 title_dict = {} 41 title_dict = {}
39 c = ["A", "B", "C", "D", "E", "F"] 42 c = ["A", "B", "C", "D", "E", "F"]
40 for i in range(len(inputs)): 43 for i in range(len(inputs)):
41 input_file = inputs[i][0] 44 input_file = inputs[i][0]
42 name = inputs[i][1] 45 name = inputs[i][1]
43 input_type = inputs[i][2] 46 input_type = inputs[i][2]
44 title = c[i] 47 title = c[i]
45 title_dict[title] = name 48 title_dict[title] = name
46 ids = set() 49 ids = set()
47 if input_type == "file": 50 if input_type == "file":
48 header = inputs[i][3] 51 header = inputs[i][3]
49 ncol = inputs[i][4] 52 ncol = inputs[i][4]
50 with open(input_file,"r") as handle : 53 with open(input_file, "r") as handle:
51 file_content = csv.reader(handle,delimiter="\t") 54 file_content = csv.reader(handle, delimiter="\t")
52 file_content = list(file_content) #csv object to list 55 file_content = list(file_content) # csv object to list
53 56
54 # Check if column number is in right form 57 # Check if column number is in right form
55 if isnumber("int", ncol.replace("c", "")): 58 if isnumber("int", ncol.replace("c", "")):
56 if header == "true": 59 if header == "true":
57 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # gets ids from defined column 60 # gets ids from defined column
61 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501
62
58 else: 63 else:
59 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] 64 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501
60 else: 65 else:
61 raise ValueError("Please fill in the right format of column number") 66 raise ValueError("Please fill in the right format of column number") # noqa 501
62 else: 67 else:
63 ids = set() 68 ids = set()
64 file_content = inputs[i][0].split() 69 file_content = inputs[i][0].split()
65 file_content = [x.split(";") for x in file_content] 70 file_content = [x.split(";") for x in file_content]
66 71
67 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] #flat list of list of lists, remove empty items 72 # flat list of list of lists, remove empty items
73 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501
74
68 ids.update(file_content) 75 ids.update(file_content)
69 if 'NA' in ids : ids.remove('NA') 76 if 'NA' in ids:
77 ids.remove('NA')
70 comp_dict[title] = ids 78 comp_dict[title] = ids
71 79
72 return comp_dict, title_dict 80 return comp_dict, title_dict
73 81
82
74 def intersect(comp_dict): 83 def intersect(comp_dict):
75 """ 84 """
76 Calculate the intersections of input 85 Calculate the intersections of input
77 """ 86 """
78 names = set(comp_dict) 87 names = set(comp_dict)
81 others = set() 90 others = set()
82 [others.add(name) for name in names if name not in group] 91 [others.add(name) for name in names if name not in group]
83 difference = [] 92 difference = []
84 intersected = set.intersection(*(comp_dict[k] for k in group)) 93 intersected = set.intersection(*(comp_dict[k] for k in group))
85 if len(others) > 0: 94 if len(others) > 0:
86 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) 95 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501
87 yield group, list(intersected), list(difference) 96 yield group, list(intersected), list(difference)
97
88 98
89 def diagram(comp_dict, title_dict): 99 def diagram(comp_dict, title_dict):
90 """ 100 """
91 Create json string for jvenn diagram plot 101 Create json string for jvenn diagram plot
92 """ 102 """
93 result = {} 103 result = {}
94 result["name"] = {} 104 result["name"] = {}
95 for k in comp_dict.keys(): 105 for k in comp_dict.keys():
96 result["name"][k] = title_dict[k] 106 result["name"][k] = title_dict[k]
97 107
98 result["data"] = {} 108 result["data"] = {}
99 result["values"] = {} 109 result["values"] = {}
100 for group, intersected, difference in intersect(comp_dict): 110 for group, intersected, difference in intersect(comp_dict):
101 if len(group) == 1: 111 if len(group) == 1:
102 result["data"]["".join(group)] = difference 112 result["data"]["".join(group)] = difference
103 result["values"]["".join(group)] = len(difference) 113 result["values"]["".join(group)] = len(difference)
104 elif len(group) > 1 and len(group) < len(comp_dict): 114 elif len(group) > 1 and len(group) < len(comp_dict):
105 result["data"]["".join(group)] = difference 115 result["data"]["".join(group)] = difference
106 result["values"]["".join(group)] = len(difference) 116 result["values"]["".join(group)] = len(difference)
107 elif len(group) == len(comp_dict): 117 elif len(group) == len(comp_dict):
108 result["data"]["".join(group)] = intersected 118 result["data"]["".join(group)] = intersected
109 result["values"]["".join(group)] = len(intersected) 119 result["values"]["".join(group)] = len(intersected)
110 120
111 return result 121 return result
112 122
113 #Write intersections of input to text output file 123 # Write intersections of input to text output file
124
125
114 def write_text_venn(json_result): 126 def write_text_venn(json_result):
115 lines = [] 127 lines = []
116 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) 128 result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501
117 for key in result : 129 for key in result:
118 if 'NA' in result[key] : result[key].remove("NA") 130 if 'NA' in result[key]:
119 list_names = dict((k, v) for k, v in json_result["name"].iteritems() if v != []) 131 result[key].remove("NA")
132
133 list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501
120 nb_lines_max = max(len(v) for v in result.values()) 134 nb_lines_max = max(len(v) for v in result.values())
121 135
122 #get list names associated to each column 136 # get list names associated to each column
123 column_dict = {} 137 column_dict = {}
124 for key in result : 138 for key in result:
125 if key in list_names : 139 if key in list_names:
126 column_dict[key] = list_names[key] 140 column_dict[key] = list_names[key]
127 else : 141 else:
128 keys= list(key) 142 keys = list(key)
129 column_dict[key] = "_".join([list_names[k] for k in keys]) 143 column_dict[key] = "_".join([list_names[k] for k in keys])
130 144
131 #construct tsv 145 # construct tsv
132 for key in result : 146 for key in result:
133 line = result[key] 147 line = result[key]
134 if len(line) < nb_lines_max : 148 if len(line) < nb_lines_max:
135 line.extend(['NA']*(nb_lines_max-len(line))) 149 line.extend(['']*(nb_lines_max-len(line)))
136 line = [column_dict[key]] + line #add header 150 line = [column_dict[key]] + line # add header
137 lines.append(line) 151 lines.append(line)
138 #transpose tsv 152 # transpose tsv
139 lines=zip(*lines) 153 lines = zip(*lines)
140 154
141 with open("venn_diagram_text_output.tsv", "w") as output: 155 with open("venn_diagram_text_output.tsv", "w") as output:
142 tsv_output = csv.writer(output, delimiter='\t') 156 tsv_output = csv.writer(output, delimiter='\t')
143 tsv_output.writerows(lines) 157 tsv_output.writerows(lines)
144 158
159
145 def write_summary(summary_file, inputs): 160 def write_summary(summary_file, inputs):
146 """ 161 """
147 Paste json string into template file 162 Paste json string into template file
148 """ 163 """
149 a, b = input_to_dict(inputs) 164 a, b = input_to_dict(inputs)
150 data = diagram(a, b) 165 data = diagram(a, b)
151 write_text_venn(data) 166 write_text_venn(data)
152 167
153 to_replace = { 168 to_replace = {
154 "series": [data], 169 "series": [data],
155 "displayStat": "true", 170 "displayStat": "true",
156 "displaySwitch": "true", 171 "displaySwitch": "true",
157 "shortNumber": "true", 172 "shortNumber": "true",
158 } 173 }
159 174
160 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) 175 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
161 FH_summary_out = open(summary_file, "w" ) 176 FH_summary_out = open(summary_file, "w")
162 for line in FH_summary_tpl: 177 for line in FH_summary_tpl:
163 if "###JVENN_DATA###" in line: 178 if "###JVENN_DATA###" in line:
164 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) 179 line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
165 FH_summary_out.write(line) 180 FH_summary_out.write(line)
166 181
167 FH_summary_out.close() 182 FH_summary_out.close()
168 FH_summary_tpl.close() 183 FH_summary_tpl.close()
169 184
185
170 def process(args): 186 def process(args):
171 write_summary(args.summary, args.input) 187 write_summary(args.summary, args.input)
172 188
173 189
174 ################################################################################################################################################## 190 #####################################################################
175 # MAIN 191 # MAIN
176 ################################################################################################################################################## 192 #####################################################################
177 if __name__ == '__main__': 193 if __name__ == '__main__':
178 # Parse parameters 194 # Parse parameters
179 parser = argparse.ArgumentParser(description='Filters an abundance file') 195 parser = argparse.ArgumentParser(description='Filters an abundance file')
180 group_input = parser.add_argument_group( 'Inputs' ) 196 group_input = parser.add_argument_group('Inputs')
181 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") 197 group_input.add_argument('--input', nargs="+", action="append",
182 group_output = parser.add_argument_group( 'Outputs' ) 198 required=True, help="The input tabular file.")
183 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") 199 group_output = parser.add_argument_group('Outputs')
200 group_output.add_argument('--summary', default="summary.html",
201 help="The HTML file containing the graphs. \
202 [Default: %(default)s]")
184 args = parser.parse_args() 203 args = parser.parse_args()
185 204
186 # Process 205 # Process
187 process( args ) 206 process(args)