annotate venn_diagram.py @ 7:087011d9e3aa draft

"planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
author proteore
date Tue, 11 May 2021 14:59:54 +0000
parents 95bbe3a6b09f
children 415ec6611b1d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
1 #!/usr/bin/env python
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
2
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
3 import argparse
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
4 import csv
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
5 import json
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
6 import os
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
7 import sys # noqa 401
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
8 import operator # noqa 401
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
9 import re
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
10 from itertools import combinations
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
11
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
12 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
13
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
14 ########################################################################
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
15 # FUNCTIONS
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
16 ########################################################################
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
17
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
18
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
19 def isnumber(format, n):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
20 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
21 Check if an element is integer or float
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
22 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
24 int_format = re.compile(r"^[-]?[1-9][0-9]*$")
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
25 test = ""
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
26 if format == "int":
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
27 test = re.match(int_format, n)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
28 elif format == "float":
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
29 test = re.match(float_format, n)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
30 if test:
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
31 return True
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
32 else:
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
33 return False
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
34
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
35
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
36 def input_to_dict(inputs):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
37 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
38 Parse input and return a dictionary of name and data of each lists/files
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
39 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
40 comp_dict = {}
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
41 title_dict = {}
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
42 c = ["A", "B", "C", "D", "E", "F"]
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
43 for i in range(len(inputs)):
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
44 input_file = inputs[i][0]
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
45 name = inputs[i][1]
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
46 input_type = inputs[i][2]
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
47 title = c[i]
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
48 title_dict[title] = name
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
49 ids = set()
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
50 if input_type == "file":
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
51 header = inputs[i][3]
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
52 ncol = inputs[i][4]
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
53 with open(input_file, "r") as handle:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
54 file_content = csv.reader(handle, delimiter="\t")
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
55 file_content = list(file_content) # csv object to list
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
56
4
c3bbc688dde2 planemo upload commit 1316c2feac23e3b1a4f929c3386142d5d5d7e37a-dirty
proteore
parents: 3
diff changeset
57 # Check if column number is in right form
c3bbc688dde2 planemo upload commit 1316c2feac23e3b1a4f929c3386142d5d5d7e37a-dirty
proteore
parents: 3
diff changeset
58 if isnumber("int", ncol.replace("c", "")):
c3bbc688dde2 planemo upload commit 1316c2feac23e3b1a4f929c3386142d5d5d7e37a-dirty
proteore
parents: 3
diff changeset
59 if header == "true":
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
60 # gets ids from defined column
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
61 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
62
4
c3bbc688dde2 planemo upload commit 1316c2feac23e3b1a4f929c3386142d5d5d7e37a-dirty
proteore
parents: 3
diff changeset
63 else:
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
64 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
65 else:
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
66 raise ValueError("Please fill in the right format of column number") # noqa 501
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
67 else:
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
68 ids = set()
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
69 file_content = inputs[i][0].split()
4
c3bbc688dde2 planemo upload commit 1316c2feac23e3b1a4f929c3386142d5d5d7e37a-dirty
proteore
parents: 3
diff changeset
70 file_content = [x.split(";") for x in file_content]
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
71
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
72 # flat list of list of lists, remove empty items
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
73 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
74
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
75 ids.update(file_content)
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
76 if 'NA' in ids:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
77 ids.remove('NA')
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
78 comp_dict[title] = ids
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
79
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
80 return comp_dict, title_dict
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
81
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
82
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
83 def intersect(comp_dict):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
84 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
85 Calculate the intersections of input
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
86 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
87 names = set(comp_dict)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
88 for i in range(1, len(comp_dict) + 1):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
89 for group in combinations(sorted(comp_dict), i):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
90 others = set()
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
91 [others.add(name) for name in names if name not in group]
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
92 difference = []
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
93 intersected = set.intersection(*(comp_dict[k] for k in group))
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
94 if len(others) > 0:
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
95 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
96 yield group, list(intersected), list(difference)
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
97
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
98
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
99 def diagram(comp_dict, title_dict):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
100 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
101 Create json string for jvenn diagram plot
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
102 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
103 result = {}
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
104 result["name"] = {}
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
105 for k in comp_dict.keys():
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
106 result["name"][k] = title_dict[k]
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
107
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
108 result["data"] = {}
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
109 result["values"] = {}
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
110 for group, intersected, difference in intersect(comp_dict):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
111 if len(group) == 1:
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
112 result["data"]["".join(group)] = difference
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
113 result["values"]["".join(group)] = len(difference)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
114 elif len(group) > 1 and len(group) < len(comp_dict):
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
115 result["data"]["".join(group)] = difference
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
116 result["values"]["".join(group)] = len(difference)
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
117 elif len(group) == len(comp_dict):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
118 result["data"]["".join(group)] = intersected
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
119 result["values"]["".join(group)] = len(intersected)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
120
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
121 return result
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
122
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
123 # Write intersections of input to text output file
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
124
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
125
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
126 def write_text_venn(json_result):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
127 lines = []
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
128 result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
129 for key in result:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
130 if 'NA' in result[key]:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
131 result[key].remove("NA")
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
132
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
133 list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501
1
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
134 nb_lines_max = max(len(v) for v in result.values())
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
135
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
136 # get list names associated to each column
1
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
137 column_dict = {}
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
138 for key in result:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
139 if key in list_names:
1
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
140 column_dict[key] = list_names[key]
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
141 else:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
142 keys = list(key)
1
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
143 column_dict[key] = "_".join([list_names[k] for k in keys])
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
144
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
145 # construct tsv
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
146 for key in result:
6
95bbe3a6b09f "planemo upload commit d78a9195d250f3d873f3c4ab67d1fa3ae58926f5-dirty"
proteore
parents: 4
diff changeset
147 line = result[key]
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
148 if len(line) < nb_lines_max:
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
149 line.extend(['']*(nb_lines_max-len(line)))
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
150 line = [column_dict[key]] + line # add header
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
151 lines.append(line)
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
152 # transpose tsv
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
153 lines = zip(*lines)
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
154
1
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
155 with open("venn_diagram_text_output.tsv", "w") as output:
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
156 tsv_output = csv.writer(output, delimiter='\t')
b17f0fbbd6c9 planemo upload commit ba044b029418f46b923830e8eaa1088833e6ef6b-dirty
proteore
parents: 0
diff changeset
157 tsv_output.writerows(lines)
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
158
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
159
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
160 def write_summary(summary_file, inputs):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
161 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
162 Paste json string into template file
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
163 """
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
164 a, b = input_to_dict(inputs)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
165 data = diagram(a, b)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
166 write_text_venn(data)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
167
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
168 to_replace = {
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
169 "series": [data],
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
170 "displayStat": "true",
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
171 "displaySwitch": "true",
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
172 "shortNumber": "true",
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
173 }
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
174
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
175 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
176 FH_summary_out = open(summary_file, "w")
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
177 for line in FH_summary_tpl:
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
178 if "###JVENN_DATA###" in line:
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
179 line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
180 FH_summary_out.write(line)
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
181
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
182 FH_summary_out.close()
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
183 FH_summary_tpl.close()
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
184
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
185
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
186 def process(args):
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
187 write_summary(args.summary, args.input)
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
188
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
189
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
190 #####################################################################
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
191 # MAIN
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
192 #####################################################################
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
193 if __name__ == '__main__':
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
194 # Parse parameters
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
195 parser = argparse.ArgumentParser(description='Filters an abundance file')
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
196 group_input = parser.add_argument_group('Inputs')
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
197 group_input.add_argument('--input', nargs="+", action="append",
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
198 required=True, help="The input tabular file.")
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
199 group_output = parser.add_argument_group('Outputs')
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
200 group_output.add_argument('--summary', default="summary.html",
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
201 help="The HTML file containing the graphs. \
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
202 [Default: %(default)s]")
0
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
203 args = parser.parse_args()
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
204
8d61f5ca50a4 planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
proteore
parents:
diff changeset
205 # Process
7
087011d9e3aa "planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
proteore
parents: 6
diff changeset
206 process(args)