argo_navis: bin/ess.py comparison

comparison bin/ess.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b

author	bcclaywell
date	Mon, 12 Oct 2015 17:43:33 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:d67268158946
+#!/usr/bin/env python
+import csv
+import numpy
+from biopy import bayesianStats as bs
+import argparse
+import re
+commented_regex = re.compile("^\s*\#.*")
+doc_template = """
+<html>
+<body>
+<h1>Effective Sample Size Results</h1>
+<p>Recommendation: {status}</p>
+<h2>Explanation</h2>
+<p>
+Ideally, you want all these values to be greater than or equal to 200.
+If any of them aren't, it's probably a good idea to perform a Resume run with BEAST.
+On average, running 3x as long will increase your ESS by 3.
+This should help give you some sense of how much longer you should run BEAST.
+</p>
+<p>
+Note that while ESS statistics are valuable for getting a rough sense of whether to run or not, it is
+<em>always</em> recommended that you manually look at your logfile traces using
+<a href="http://tree.bio.ed.ac.uk/software/tracer/">Tracer</a>, or another trace analysis software before
+accepting your analysis as "final".
+</p>
+<h2>Detailed Results</h2>
+<table>
+<tr><th>Parameter</th><th>ESS</th></tr>
+{table_content}
+</table>
+</body>
+</html>
+"""
+tr_template = "<tr><td>{parameter}</td><td {style}>{ess}</td></tr>"
+def format_tr(result, threshold=200):
+"Takes a result [param, ess] pair and makes the appropriate tr html"
+param, ess = result
+passes = ess >= threshold
+style = "" if passes else 'style="color: red;"'
+return tr_template.format(parameter=param, ess=ess, style=style)
+def dict_reader_to_cols(dict_reader):
+d = dict()
+for row in dict_reader:
+for k, v in row.iteritems():
+try:
+v = float(v)
+try:
+d[k].append(v)
+except KeyError:
+d[k] = [v]
+except ValueError:
+if k == "" and v == "":
+# Don't worry about this, there is a tailing \t on each line
+pass
+else:
+# Otherwise... something weird is happening
+print "Could not cast to float for kv pair:", k, v
+return d
+def table_contents(results, threshold=200):
+trs = [format_tr(result, threshold) for result in sorted(results)]
+return "\n".join(trs)
+def html_contents(results, threshold):
+table_content = table_contents(results, threshold)
+within_tol = [x[1] > threshold for x in results]
+ess_good = all(within_tol)
+status = "Good to go" if all(within_tol) else "Should run longer"
+return doc_template.format(status=status, table_content=table_content)
+def get_args():
+parser = argparse.ArgumentParser()
+parser.add_argument('logfile', type=argparse.FileType('r'))
+parser.add_argument('ess_out', type=argparse.FileType('w'))
+parser.add_argument('-t', '--threshold', type=float, default=200,
+help="""For formatting html, what's the minimal ESS score to be considered high enough?
+(default: 200""")
+parser.add_argument('--html-out', action="store_true",
+help="Default is CSV output")
+return parser.parse_args()
+def main(args):
+non_comment_lines = (row for row in args.logfile if not commented_regex.match(row))
+reader = csv.DictReader(non_comment_lines, delimiter="\t")
+data_columns = dict_reader_to_cols(reader)
+results = []
+for colname, data in data_columns.iteritems():
+if colname != "Sample":
+results.append([colname, bs.effectiveSampleSize(data)])
+if args.html_out:
+html_content = html_contents(results, args.threshold)
+args.ess_out.write(html_content)
+else:
+writer = csv.writer(args.ess_out)
+writer.writerow(["statistic", "ess"])
+for result in results:
+writer.writerow(result)
+args.logfile.close()
+args.ess_out.close()
+if __name__ == '__main__':
+main(get_args())

Mercurial > repos > bcclaywell > argo_navis

comparison bin/ess.py @ 0:d67268158946 draft