Mercurial > repos > proteore > proteore_venn_diagram
comparison venn_diagram.py @ 7:087011d9e3aa draft
"planemo upload commit 4612cde808ac08a368cca0b0f49358001e23eb1d-dirty"
| author | proteore |
|---|---|
| date | Tue, 11 May 2021 14:59:54 +0000 |
| parents | 95bbe3a6b09f |
| children | 415ec6611b1d |
comparison
equal
deleted
inserted
replaced
| 6:95bbe3a6b09f | 7:087011d9e3aa |
|---|---|
| 1 #!/usr/bin/env python2.7 | 1 #!/usr/bin/env python |
| 2 | 2 |
| 3 import argparse | |
| 4 import csv | |
| 5 import json | |
| 3 import os | 6 import os |
| 4 import sys | 7 import sys # noqa 401 |
| 5 import json | 8 import operator # noqa 401 |
| 6 import operator | 9 import re |
| 7 import argparse | |
| 8 import re, csv | |
| 9 from itertools import combinations | 10 from itertools import combinations |
| 10 | 11 |
| 11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | 12 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 12 | 13 |
| 13 ################################################################################################################################################## | 14 ######################################################################## |
| 14 # FUNCTIONS | 15 # FUNCTIONS |
| 15 ################################################################################################################################################## | 16 ######################################################################## |
| 16 | 17 |
| 18 | |
| 17 def isnumber(format, n): | 19 def isnumber(format, n): |
| 18 """ | 20 """ |
| 19 Check if an element is integer or float | 21 Check if an element is integer or float |
| 20 """ | 22 """ |
| 21 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") | 23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") |
| 27 test = re.match(float_format, n) | 29 test = re.match(float_format, n) |
| 28 if test: | 30 if test: |
| 29 return True | 31 return True |
| 30 else: | 32 else: |
| 31 return False | 33 return False |
| 32 | 34 |
| 35 | |
| 33 def input_to_dict(inputs): | 36 def input_to_dict(inputs): |
| 34 """ | 37 """ |
| 35 Parse input and return a dictionary of name and data of each lists/files | 38 Parse input and return a dictionary of name and data of each lists/files |
| 36 """ | 39 """ |
| 37 comp_dict = {} | 40 comp_dict = {} |
| 38 title_dict = {} | 41 title_dict = {} |
| 39 c = ["A", "B", "C", "D", "E", "F"] | 42 c = ["A", "B", "C", "D", "E", "F"] |
| 40 for i in range(len(inputs)): | 43 for i in range(len(inputs)): |
| 41 input_file = inputs[i][0] | 44 input_file = inputs[i][0] |
| 42 name = inputs[i][1] | 45 name = inputs[i][1] |
| 43 input_type = inputs[i][2] | 46 input_type = inputs[i][2] |
| 44 title = c[i] | 47 title = c[i] |
| 45 title_dict[title] = name | 48 title_dict[title] = name |
| 46 ids = set() | 49 ids = set() |
| 47 if input_type == "file": | 50 if input_type == "file": |
| 48 header = inputs[i][3] | 51 header = inputs[i][3] |
| 49 ncol = inputs[i][4] | 52 ncol = inputs[i][4] |
| 50 with open(input_file,"r") as handle : | 53 with open(input_file, "r") as handle: |
| 51 file_content = csv.reader(handle,delimiter="\t") | 54 file_content = csv.reader(handle, delimiter="\t") |
| 52 file_content = list(file_content) #csv object to list | 55 file_content = list(file_content) # csv object to list |
| 53 | 56 |
| 54 # Check if column number is in right form | 57 # Check if column number is in right form |
| 55 if isnumber("int", ncol.replace("c", "")): | 58 if isnumber("int", ncol.replace("c", "")): |
| 56 if header == "true": | 59 if header == "true": |
| 57 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # gets ids from defined column | 60 # gets ids from defined column |
| 61 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 | |
| 62 | |
| 58 else: | 63 else: |
| 59 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] | 64 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 |
| 60 else: | 65 else: |
| 61 raise ValueError("Please fill in the right format of column number") | 66 raise ValueError("Please fill in the right format of column number") # noqa 501 |
| 62 else: | 67 else: |
| 63 ids = set() | 68 ids = set() |
| 64 file_content = inputs[i][0].split() | 69 file_content = inputs[i][0].split() |
| 65 file_content = [x.split(";") for x in file_content] | 70 file_content = [x.split(";") for x in file_content] |
| 66 | 71 |
| 67 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] #flat list of list of lists, remove empty items | 72 # flat list of list of lists, remove empty items |
| 73 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501 | |
| 74 | |
| 68 ids.update(file_content) | 75 ids.update(file_content) |
| 69 if 'NA' in ids : ids.remove('NA') | 76 if 'NA' in ids: |
| 77 ids.remove('NA') | |
| 70 comp_dict[title] = ids | 78 comp_dict[title] = ids |
| 71 | 79 |
| 72 return comp_dict, title_dict | 80 return comp_dict, title_dict |
| 73 | 81 |
| 82 | |
| 74 def intersect(comp_dict): | 83 def intersect(comp_dict): |
| 75 """ | 84 """ |
| 76 Calculate the intersections of input | 85 Calculate the intersections of input |
| 77 """ | 86 """ |
| 78 names = set(comp_dict) | 87 names = set(comp_dict) |
| 81 others = set() | 90 others = set() |
| 82 [others.add(name) for name in names if name not in group] | 91 [others.add(name) for name in names if name not in group] |
| 83 difference = [] | 92 difference = [] |
| 84 intersected = set.intersection(*(comp_dict[k] for k in group)) | 93 intersected = set.intersection(*(comp_dict[k] for k in group)) |
| 85 if len(others) > 0: | 94 if len(others) > 0: |
| 86 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) | 95 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501 |
| 87 yield group, list(intersected), list(difference) | 96 yield group, list(intersected), list(difference) |
| 97 | |
| 88 | 98 |
| 89 def diagram(comp_dict, title_dict): | 99 def diagram(comp_dict, title_dict): |
| 90 """ | 100 """ |
| 91 Create json string for jvenn diagram plot | 101 Create json string for jvenn diagram plot |
| 92 """ | 102 """ |
| 93 result = {} | 103 result = {} |
| 94 result["name"] = {} | 104 result["name"] = {} |
| 95 for k in comp_dict.keys(): | 105 for k in comp_dict.keys(): |
| 96 result["name"][k] = title_dict[k] | 106 result["name"][k] = title_dict[k] |
| 97 | 107 |
| 98 result["data"] = {} | 108 result["data"] = {} |
| 99 result["values"] = {} | 109 result["values"] = {} |
| 100 for group, intersected, difference in intersect(comp_dict): | 110 for group, intersected, difference in intersect(comp_dict): |
| 101 if len(group) == 1: | 111 if len(group) == 1: |
| 102 result["data"]["".join(group)] = difference | 112 result["data"]["".join(group)] = difference |
| 103 result["values"]["".join(group)] = len(difference) | 113 result["values"]["".join(group)] = len(difference) |
| 104 elif len(group) > 1 and len(group) < len(comp_dict): | 114 elif len(group) > 1 and len(group) < len(comp_dict): |
| 105 result["data"]["".join(group)] = difference | 115 result["data"]["".join(group)] = difference |
| 106 result["values"]["".join(group)] = len(difference) | 116 result["values"]["".join(group)] = len(difference) |
| 107 elif len(group) == len(comp_dict): | 117 elif len(group) == len(comp_dict): |
| 108 result["data"]["".join(group)] = intersected | 118 result["data"]["".join(group)] = intersected |
| 109 result["values"]["".join(group)] = len(intersected) | 119 result["values"]["".join(group)] = len(intersected) |
| 110 | 120 |
| 111 return result | 121 return result |
| 112 | 122 |
| 113 #Write intersections of input to text output file | 123 # Write intersections of input to text output file |
| 124 | |
| 125 | |
| 114 def write_text_venn(json_result): | 126 def write_text_venn(json_result): |
| 115 lines = [] | 127 lines = [] |
| 116 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) | 128 result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501 |
| 117 for key in result : | 129 for key in result: |
| 118 if 'NA' in result[key] : result[key].remove("NA") | 130 if 'NA' in result[key]: |
| 119 list_names = dict((k, v) for k, v in json_result["name"].iteritems() if v != []) | 131 result[key].remove("NA") |
| 132 | |
| 133 list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501 | |
| 120 nb_lines_max = max(len(v) for v in result.values()) | 134 nb_lines_max = max(len(v) for v in result.values()) |
| 121 | 135 |
| 122 #get list names associated to each column | 136 # get list names associated to each column |
| 123 column_dict = {} | 137 column_dict = {} |
| 124 for key in result : | 138 for key in result: |
| 125 if key in list_names : | 139 if key in list_names: |
| 126 column_dict[key] = list_names[key] | 140 column_dict[key] = list_names[key] |
| 127 else : | 141 else: |
| 128 keys= list(key) | 142 keys = list(key) |
| 129 column_dict[key] = "_".join([list_names[k] for k in keys]) | 143 column_dict[key] = "_".join([list_names[k] for k in keys]) |
| 130 | 144 |
| 131 #construct tsv | 145 # construct tsv |
| 132 for key in result : | 146 for key in result: |
| 133 line = result[key] | 147 line = result[key] |
| 134 if len(line) < nb_lines_max : | 148 if len(line) < nb_lines_max: |
| 135 line.extend(['NA']*(nb_lines_max-len(line))) | 149 line.extend(['']*(nb_lines_max-len(line))) |
| 136 line = [column_dict[key]] + line #add header | 150 line = [column_dict[key]] + line # add header |
| 137 lines.append(line) | 151 lines.append(line) |
| 138 #transpose tsv | 152 # transpose tsv |
| 139 lines=zip(*lines) | 153 lines = zip(*lines) |
| 140 | 154 |
| 141 with open("venn_diagram_text_output.tsv", "w") as output: | 155 with open("venn_diagram_text_output.tsv", "w") as output: |
| 142 tsv_output = csv.writer(output, delimiter='\t') | 156 tsv_output = csv.writer(output, delimiter='\t') |
| 143 tsv_output.writerows(lines) | 157 tsv_output.writerows(lines) |
| 144 | 158 |
| 159 | |
| 145 def write_summary(summary_file, inputs): | 160 def write_summary(summary_file, inputs): |
| 146 """ | 161 """ |
| 147 Paste json string into template file | 162 Paste json string into template file |
| 148 """ | 163 """ |
| 149 a, b = input_to_dict(inputs) | 164 a, b = input_to_dict(inputs) |
| 150 data = diagram(a, b) | 165 data = diagram(a, b) |
| 151 write_text_venn(data) | 166 write_text_venn(data) |
| 152 | 167 |
| 153 to_replace = { | 168 to_replace = { |
| 154 "series": [data], | 169 "series": [data], |
| 155 "displayStat": "true", | 170 "displayStat": "true", |
| 156 "displaySwitch": "true", | 171 "displaySwitch": "true", |
| 157 "shortNumber": "true", | 172 "shortNumber": "true", |
| 158 } | 173 } |
| 159 | 174 |
| 160 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) | 175 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) |
| 161 FH_summary_out = open(summary_file, "w" ) | 176 FH_summary_out = open(summary_file, "w") |
| 162 for line in FH_summary_tpl: | 177 for line in FH_summary_tpl: |
| 163 if "###JVENN_DATA###" in line: | 178 if "###JVENN_DATA###" in line: |
| 164 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) | 179 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) |
| 165 FH_summary_out.write(line) | 180 FH_summary_out.write(line) |
| 166 | 181 |
| 167 FH_summary_out.close() | 182 FH_summary_out.close() |
| 168 FH_summary_tpl.close() | 183 FH_summary_tpl.close() |
| 169 | 184 |
| 185 | |
| 170 def process(args): | 186 def process(args): |
| 171 write_summary(args.summary, args.input) | 187 write_summary(args.summary, args.input) |
| 172 | 188 |
| 173 | 189 |
| 174 ################################################################################################################################################## | 190 ##################################################################### |
| 175 # MAIN | 191 # MAIN |
| 176 ################################################################################################################################################## | 192 ##################################################################### |
| 177 if __name__ == '__main__': | 193 if __name__ == '__main__': |
| 178 # Parse parameters | 194 # Parse parameters |
| 179 parser = argparse.ArgumentParser(description='Filters an abundance file') | 195 parser = argparse.ArgumentParser(description='Filters an abundance file') |
| 180 group_input = parser.add_argument_group( 'Inputs' ) | 196 group_input = parser.add_argument_group('Inputs') |
| 181 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") | 197 group_input.add_argument('--input', nargs="+", action="append", |
| 182 group_output = parser.add_argument_group( 'Outputs' ) | 198 required=True, help="The input tabular file.") |
| 183 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") | 199 group_output = parser.add_argument_group('Outputs') |
| 200 group_output.add_argument('--summary', default="summary.html", | |
| 201 help="The HTML file containing the graphs. \ | |
| 202 [Default: %(default)s]") | |
| 184 args = parser.parse_args() | 203 args = parser.parse_args() |
| 185 | 204 |
| 186 # Process | 205 # Process |
| 187 process( args ) | 206 process(args) |
