Mercurial > repos > proteore > proteore_venn_diagram
comparison venn_diagram.py @ 0:8d61f5ca50a4 draft
planemo upload commit 9760cde192a15cdf3d2dbec05dd867eaa0392bcd-dirty
| author | proteore |
|---|---|
| date | Mon, 12 Nov 2018 11:26:40 -0500 |
| parents | |
| children | b17f0fbbd6c9 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8d61f5ca50a4 |
|---|---|
| 1 #!/usr/bin/env python2.7 | |
| 2 | |
| 3 import os | |
| 4 import sys | |
| 5 import json | |
| 6 import operator | |
| 7 import argparse | |
| 8 import re | |
| 9 from itertools import combinations | |
| 10 | |
| 11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| 12 | |
| 13 ################################################################################################################################################## | |
| 14 # FUNCTIONS | |
| 15 ################################################################################################################################################## | |
| 16 | |
| 17 def isnumber(format, n): | |
| 18 """ | |
| 19 Check if an element is integer or float | |
| 20 """ | |
| 21 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") | |
| 22 int_format = re.compile(r"^[-]?[1-9][0-9]*$") | |
| 23 test = "" | |
| 24 if format == "int": | |
| 25 test = re.match(int_format, n) | |
| 26 elif format == "float": | |
| 27 test = re.match(float_format, n) | |
| 28 if test: | |
| 29 return True | |
| 30 else: | |
| 31 return False | |
| 32 | |
| 33 def input_to_dict(inputs): | |
| 34 """ | |
| 35 Parse input and return a dictionary of name and data of each lists/files | |
| 36 """ | |
| 37 comp_dict = {} | |
| 38 title_dict = {} | |
| 39 c = ["A", "B", "C", "D", "E", "F"] | |
| 40 for i in range(len(inputs)): | |
| 41 input_file = inputs[i][0] | |
| 42 name = inputs[i][1] | |
| 43 input_type = inputs[i][2] | |
| 44 title = c[i] | |
| 45 title_dict[title] = name | |
| 46 ids = set() | |
| 47 if input_type == "file": | |
| 48 header = inputs[i][3] | |
| 49 ncol = inputs[i][4] | |
| 50 file_content = open(input_file, "r").readlines() | |
| 51 | |
| 52 # Check if column number is in right form | |
| 53 if isnumber("int", ncol.replace("c", "")): | |
| 54 if header == "true": | |
| 55 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]] # take only first IDs | |
| 56 else: | |
| 57 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]] # take only first IDs | |
| 58 else: | |
| 59 raise ValueError("Please fill in the right format of column number") | |
| 60 else: | |
| 61 ids = set() | |
| 62 file_content = inputs[i][0].split() | |
| 63 | |
| 64 ids.update(file_content) | |
| 65 comp_dict[title] = ids | |
| 66 | |
| 67 return comp_dict, title_dict | |
| 68 | |
| 69 def intersect(comp_dict): | |
| 70 """ | |
| 71 Calculate the intersections of input | |
| 72 """ | |
| 73 names = set(comp_dict) | |
| 74 for i in range(1, len(comp_dict) + 1): | |
| 75 for group in combinations(sorted(comp_dict), i): | |
| 76 others = set() | |
| 77 [others.add(name) for name in names if name not in group] | |
| 78 difference = [] | |
| 79 intersected = set.intersection(*(comp_dict[k] for k in group)) | |
| 80 if len(others) > 0: | |
| 81 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) | |
| 82 yield group, list(intersected), list(difference) | |
| 83 | |
| 84 def diagram(comp_dict, title_dict): | |
| 85 """ | |
| 86 Create json string for jvenn diagram plot | |
| 87 """ | |
| 88 result = {} | |
| 89 result["name"] = {} | |
| 90 for k in comp_dict.keys(): | |
| 91 result["name"][k] = title_dict[k] | |
| 92 | |
| 93 result["data"] = {} | |
| 94 result["values"] = {} | |
| 95 for group, intersected, difference in intersect(comp_dict): | |
| 96 if len(group) == 1: | |
| 97 result["data"]["".join(group)] = difference | |
| 98 result["values"]["".join(group)] = len(difference) | |
| 99 elif len(group) > 1 and len(group) < len(comp_dict): | |
| 100 result["data"]["".join(group)] = difference | |
| 101 result["values"]["".join(group)] = len(difference) | |
| 102 elif len(group) == len(comp_dict): | |
| 103 result["data"]["".join(group)] = intersected | |
| 104 result["values"]["".join(group)] = len(intersected) | |
| 105 | |
| 106 return result | |
| 107 | |
| 108 def write_text_venn(json_result): | |
| 109 """ | |
| 110 Write intersections of input to text output file | |
| 111 """ | |
| 112 output = open("venn_diagram_text_output.txt", "w") | |
| 113 string = "" | |
| 114 lines = [] | |
| 115 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) | |
| 116 max_count = max(len(v) for v in result.values()) | |
| 117 for i in range(max_count): | |
| 118 lines.append("") | |
| 119 | |
| 120 for i in range(max_count): | |
| 121 header = "" | |
| 122 for d in range(len(result.keys())): | |
| 123 data = result.keys()[d] | |
| 124 name = "_".join([json_result["name"][x] for x in data]) | |
| 125 header += name + "\t" | |
| 126 if len(result[data]) > i: | |
| 127 print("a", result[data][i]) | |
| 128 lines[i] += result[data][i] + "\t" | |
| 129 else: | |
| 130 lines[i] += "\t" | |
| 131 # Strip last tab in the end of the lines | |
| 132 header = header.rstrip() | |
| 133 lines = [line.rstrip() for line in lines] | |
| 134 string += header + "\n" | |
| 135 string += "\n".join(lines) | |
| 136 output.write(string) | |
| 137 output.close() | |
| 138 | |
| 139 def write_summary(summary_file, inputs): | |
| 140 """ | |
| 141 Paste json string into template file | |
| 142 """ | |
| 143 a, b = input_to_dict(inputs) | |
| 144 data = diagram(a, b) | |
| 145 write_text_venn(data) | |
| 146 | |
| 147 to_replace = { | |
| 148 "series": [data], | |
| 149 "displayStat": "true", | |
| 150 "displaySwitch": "true", | |
| 151 "shortNumber": "true", | |
| 152 } | |
| 153 | |
| 154 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) | |
| 155 FH_summary_out = open(summary_file, "w" ) | |
| 156 for line in FH_summary_tpl: | |
| 157 if "###JVENN_DATA###" in line: | |
| 158 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) | |
| 159 FH_summary_out.write(line) | |
| 160 | |
| 161 FH_summary_out.close() | |
| 162 FH_summary_tpl.close() | |
| 163 | |
| 164 def process(args): | |
| 165 write_summary(args.summary, args.input) | |
| 166 | |
| 167 | |
| 168 ################################################################################################################################################## | |
| 169 # MAIN | |
| 170 ################################################################################################################################################## | |
| 171 if __name__ == '__main__': | |
| 172 # Parse parameters | |
| 173 parser = argparse.ArgumentParser(description='Filters an abundance file') | |
| 174 group_input = parser.add_argument_group( 'Inputs' ) | |
| 175 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") | |
| 176 group_output = parser.add_argument_group( 'Outputs' ) | |
| 177 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") | |
| 178 args = parser.parse_args() | |
| 179 | |
| 180 # Process | |
| 181 process( args ) |
