Mercurial > repos > bcclaywell > argo_navis
comparison bin/parse_pact_tree.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
| author | bcclaywell |
|---|---|
| date | Mon, 12 Oct 2015 17:43:33 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d67268158946 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import csv | |
| 4 import re | |
| 5 | |
| 6 | |
| 7 coord_re = re.compile("\{([-\d\.]+),([-\d\.]+)\}") | |
| 8 | |
| 9 | |
| 10 def parse_rules(handle): | |
| 11 # Returns a very raw and literal translation of the out.rules results from PACT | |
| 12 def get_nodes(line): | |
| 13 return [int(x) for x in line.split()] | |
| 14 | |
| 15 def int_if_intable(a): | |
| 16 try: | |
| 17 return int(a) | |
| 18 except: | |
| 19 return a | |
| 20 | |
| 21 def get_map(line, imgfn=int_if_intable): | |
| 22 coll = (x.split('->') for x in line.split()) | |
| 23 return dict([(int_if_intable(a), imgfn(b)) for a, b in coll]) | |
| 24 | |
| 25 def parse_coordinate(text): | |
| 26 m = coord_re.match(text) | |
| 27 return (float(m.group(1)), float(m.group(2))) | |
| 28 | |
| 29 tips = get_nodes(handle.next()) | |
| 30 trunk_nodes = get_nodes(handle.next()) | |
| 31 connections = get_map(handle.next()) | |
| 32 labels = get_map(handle.next()) | |
| 33 coordinates = get_map(handle.next(), parse_coordinate) | |
| 34 tip_names = get_map(handle.next(), str) | |
| 35 return dict(tips=tips, trunk_nodes=trunk_nodes, connections=connections, labels=labels, | |
| 36 coordinates=coordinates, tip_names=tip_names) | |
| 37 | |
| 38 | |
| 39 def get_row(parsed_tree, n_id): | |
| 40 # This gives us the row data (as seen in final table) for the given n_id value | |
| 41 if n_id in parsed_tree['tips']: | |
| 42 klass = "tip" | |
| 43 name = parsed_tree['tip_names'][n_id] | |
| 44 else: | |
| 45 klass = "trunk" | |
| 46 name = "" | |
| 47 try: | |
| 48 parent_id = parsed_tree['connections'][n_id] | |
| 49 except KeyError: | |
| 50 parent_id = n_id | |
| 51 klass = "root" | |
| 52 label = parsed_tree['labels'][n_id] | |
| 53 | |
| 54 x, y = parsed_tree['coordinates'][n_id] | |
| 55 parent_x, parent_y = parsed_tree['coordinates'][parent_id] | |
| 56 | |
| 57 return dict(id=n_id, klass=klass, name=name, parent_id=parent_id, x=x, y=y, parent_x=parent_x, | |
| 58 parent_y=parent_y, label=label) | |
| 59 | |
| 60 | |
| 61 def parsed_to_table(parsed_tree): | |
| 62 # Cols are going to be: | |
| 63 # id, parent_id, label, klass, name, x, y, parent_x, parent_y, | |
| 64 for n_id in parsed_tree['coordinates'].keys(): | |
| 65 yield get_row(parsed_tree, n_id) | |
| 66 | |
| 67 | |
| 68 def get_args(): | |
| 69 parser = argparse.ArgumentParser(prog="parse_pact_tree.py", | |
| 70 description="""Utility for parsing the output of PACT into a form renderable by ggplot""") | |
| 71 parser.add_argument('input', type=argparse.FileType('r')) | |
| 72 parser.add_argument('output', type=argparse.FileType('w')) | |
| 73 return parser.parse_args() | |
| 74 | |
| 75 | |
| 76 def main(): | |
| 77 # Get args, run the parser, spit out the results into a file | |
| 78 args = get_args() | |
| 79 data = parse_rules(args.input) | |
| 80 | |
| 81 writer = csv.DictWriter(args.output, | |
| 82 fieldnames=["id", "parent_id", "label", "klass", "name", "x", "y", "parent_x", "parent_y"]) | |
| 83 writer.writeheader() | |
| 84 | |
| 85 for row in parsed_to_table(data): | |
| 86 writer.writerow(row) | |
| 87 | |
| 88 args.input.close() | |
| 89 args.output.close() | |
| 90 | |
| 91 | |
| 92 if __name__ == '__main__': | |
| 93 main() | |
| 94 | |
| 95 | |
| 96 |
