Mercurial > repos > bcclaywell > argo_navis
comparison bin/ess.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import csv | |
4 import numpy | |
5 from biopy import bayesianStats as bs | |
6 import argparse | |
7 import re | |
8 | |
9 | |
10 commented_regex = re.compile("^\s*\#.*") | |
11 | |
12 doc_template = """ | |
13 <html> | |
14 <body> | |
15 <h1>Effective Sample Size Results</h1> | |
16 <p>Recommendation: {status}</p> | |
17 | |
18 <h2>Explanation</h2> | |
19 <p> | |
20 Ideally, you want all these values to be greater than or equal to 200. | |
21 If any of them aren't, it's probably a good idea to perform a Resume run with BEAST. | |
22 On average, running 3x as long will increase your ESS by 3. | |
23 This should help give you some sense of how much longer you should run BEAST. | |
24 </p> | |
25 <p> | |
26 Note that while ESS statistics are valuable for getting a rough sense of whether to run or not, it is | |
27 <em>always</em> recommended that you manually look at your logfile traces using | |
28 <a href="http://tree.bio.ed.ac.uk/software/tracer/">Tracer</a>, or another trace analysis software before | |
29 accepting your analysis as "final". | |
30 </p> | |
31 | |
32 <h2>Detailed Results</h2> | |
33 <table> | |
34 <tr><th>Parameter</th><th>ESS</th></tr> | |
35 {table_content} | |
36 </table> | |
37 </body> | |
38 </html> | |
39 """ | |
40 | |
41 tr_template = "<tr><td>{parameter}</td><td {style}>{ess}</td></tr>" | |
42 | |
43 | |
44 def format_tr(result, threshold=200): | |
45 "Takes a result [param, ess] pair and makes the appropriate tr html" | |
46 param, ess = result | |
47 passes = ess >= threshold | |
48 style = "" if passes else 'style="color: red;"' | |
49 return tr_template.format(parameter=param, ess=ess, style=style) | |
50 | |
51 | |
52 def dict_reader_to_cols(dict_reader): | |
53 d = dict() | |
54 for row in dict_reader: | |
55 for k, v in row.iteritems(): | |
56 try: | |
57 v = float(v) | |
58 try: | |
59 d[k].append(v) | |
60 except KeyError: | |
61 d[k] = [v] | |
62 except ValueError: | |
63 if k == "" and v == "": | |
64 # Don't worry about this, there is a tailing \t on each line | |
65 pass | |
66 else: | |
67 # Otherwise... something weird is happening | |
68 print "Could not cast to float for kv pair:", k, v | |
69 return d | |
70 | |
71 | |
72 def table_contents(results, threshold=200): | |
73 trs = [format_tr(result, threshold) for result in sorted(results)] | |
74 return "\n".join(trs) | |
75 | |
76 | |
77 def html_contents(results, threshold): | |
78 table_content = table_contents(results, threshold) | |
79 within_tol = [x[1] > threshold for x in results] | |
80 ess_good = all(within_tol) | |
81 status = "Good to go" if all(within_tol) else "Should run longer" | |
82 return doc_template.format(status=status, table_content=table_content) | |
83 | |
84 | |
85 def get_args(): | |
86 parser = argparse.ArgumentParser() | |
87 parser.add_argument('logfile', type=argparse.FileType('r')) | |
88 parser.add_argument('ess_out', type=argparse.FileType('w')) | |
89 parser.add_argument('-t', '--threshold', type=float, default=200, | |
90 help="""For formatting html, what's the minimal ESS score to be considered high enough? | |
91 (default: 200""") | |
92 parser.add_argument('--html-out', action="store_true", | |
93 help="Default is CSV output") | |
94 return parser.parse_args() | |
95 | |
96 | |
97 def main(args): | |
98 non_comment_lines = (row for row in args.logfile if not commented_regex.match(row)) | |
99 reader = csv.DictReader(non_comment_lines, delimiter="\t") | |
100 data_columns = dict_reader_to_cols(reader) | |
101 results = [] | |
102 for colname, data in data_columns.iteritems(): | |
103 if colname != "Sample": | |
104 results.append([colname, bs.effectiveSampleSize(data)]) | |
105 | |
106 if args.html_out: | |
107 html_content = html_contents(results, args.threshold) | |
108 args.ess_out.write(html_content) | |
109 else: | |
110 writer = csv.writer(args.ess_out) | |
111 writer.writerow(["statistic", "ess"]) | |
112 for result in results: | |
113 writer.writerow(result) | |
114 | |
115 args.logfile.close() | |
116 args.ess_out.close() | |
117 | |
118 | |
119 if __name__ == '__main__': | |
120 main(get_args()) | |
121 | |
122 |