Mercurial > repos > bcclaywell > argo_navis
comparison bin/ess.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
| author | bcclaywell |
|---|---|
| date | Mon, 12 Oct 2015 17:43:33 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d67268158946 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import csv | |
| 4 import numpy | |
| 5 from biopy import bayesianStats as bs | |
| 6 import argparse | |
| 7 import re | |
| 8 | |
| 9 | |
| 10 commented_regex = re.compile("^\s*\#.*") | |
| 11 | |
| 12 doc_template = """ | |
| 13 <html> | |
| 14 <body> | |
| 15 <h1>Effective Sample Size Results</h1> | |
| 16 <p>Recommendation: {status}</p> | |
| 17 | |
| 18 <h2>Explanation</h2> | |
| 19 <p> | |
| 20 Ideally, you want all these values to be greater than or equal to 200. | |
| 21 If any of them aren't, it's probably a good idea to perform a Resume run with BEAST. | |
| 22 On average, running 3x as long will increase your ESS by 3. | |
| 23 This should help give you some sense of how much longer you should run BEAST. | |
| 24 </p> | |
| 25 <p> | |
| 26 Note that while ESS statistics are valuable for getting a rough sense of whether to run or not, it is | |
| 27 <em>always</em> recommended that you manually look at your logfile traces using | |
| 28 <a href="http://tree.bio.ed.ac.uk/software/tracer/">Tracer</a>, or another trace analysis software before | |
| 29 accepting your analysis as "final". | |
| 30 </p> | |
| 31 | |
| 32 <h2>Detailed Results</h2> | |
| 33 <table> | |
| 34 <tr><th>Parameter</th><th>ESS</th></tr> | |
| 35 {table_content} | |
| 36 </table> | |
| 37 </body> | |
| 38 </html> | |
| 39 """ | |
| 40 | |
| 41 tr_template = "<tr><td>{parameter}</td><td {style}>{ess}</td></tr>" | |
| 42 | |
| 43 | |
| 44 def format_tr(result, threshold=200): | |
| 45 "Takes a result [param, ess] pair and makes the appropriate tr html" | |
| 46 param, ess = result | |
| 47 passes = ess >= threshold | |
| 48 style = "" if passes else 'style="color: red;"' | |
| 49 return tr_template.format(parameter=param, ess=ess, style=style) | |
| 50 | |
| 51 | |
| 52 def dict_reader_to_cols(dict_reader): | |
| 53 d = dict() | |
| 54 for row in dict_reader: | |
| 55 for k, v in row.iteritems(): | |
| 56 try: | |
| 57 v = float(v) | |
| 58 try: | |
| 59 d[k].append(v) | |
| 60 except KeyError: | |
| 61 d[k] = [v] | |
| 62 except ValueError: | |
| 63 if k == "" and v == "": | |
| 64 # Don't worry about this, there is a tailing \t on each line | |
| 65 pass | |
| 66 else: | |
| 67 # Otherwise... something weird is happening | |
| 68 print "Could not cast to float for kv pair:", k, v | |
| 69 return d | |
| 70 | |
| 71 | |
| 72 def table_contents(results, threshold=200): | |
| 73 trs = [format_tr(result, threshold) for result in sorted(results)] | |
| 74 return "\n".join(trs) | |
| 75 | |
| 76 | |
| 77 def html_contents(results, threshold): | |
| 78 table_content = table_contents(results, threshold) | |
| 79 within_tol = [x[1] > threshold for x in results] | |
| 80 ess_good = all(within_tol) | |
| 81 status = "Good to go" if all(within_tol) else "Should run longer" | |
| 82 return doc_template.format(status=status, table_content=table_content) | |
| 83 | |
| 84 | |
| 85 def get_args(): | |
| 86 parser = argparse.ArgumentParser() | |
| 87 parser.add_argument('logfile', type=argparse.FileType('r')) | |
| 88 parser.add_argument('ess_out', type=argparse.FileType('w')) | |
| 89 parser.add_argument('-t', '--threshold', type=float, default=200, | |
| 90 help="""For formatting html, what's the minimal ESS score to be considered high enough? | |
| 91 (default: 200""") | |
| 92 parser.add_argument('--html-out', action="store_true", | |
| 93 help="Default is CSV output") | |
| 94 return parser.parse_args() | |
| 95 | |
| 96 | |
| 97 def main(args): | |
| 98 non_comment_lines = (row for row in args.logfile if not commented_regex.match(row)) | |
| 99 reader = csv.DictReader(non_comment_lines, delimiter="\t") | |
| 100 data_columns = dict_reader_to_cols(reader) | |
| 101 results = [] | |
| 102 for colname, data in data_columns.iteritems(): | |
| 103 if colname != "Sample": | |
| 104 results.append([colname, bs.effectiveSampleSize(data)]) | |
| 105 | |
| 106 if args.html_out: | |
| 107 html_content = html_contents(results, args.threshold) | |
| 108 args.ess_out.write(html_content) | |
| 109 else: | |
| 110 writer = csv.writer(args.ess_out) | |
| 111 writer.writerow(["statistic", "ess"]) | |
| 112 for result in results: | |
| 113 writer.writerow(result) | |
| 114 | |
| 115 args.logfile.close() | |
| 116 args.ess_out.close() | |
| 117 | |
| 118 | |
| 119 if __name__ == '__main__': | |
| 120 main(get_args()) | |
| 121 | |
| 122 |
