Mercurial > repos > iuc > hyphy_annotate
comparison scripts/hyphy_summary.py @ 6:3e4cfc55a7e9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit ec5db8349483b2cd46f9da23abe6cefcf65bc714"
| author | iuc |
|---|---|
| date | Sat, 03 Jul 2021 08:33:14 +0000 |
| parents | 5c296943043c |
| children |
comparison
equal
deleted
inserted
replaced
| 5:5c296943043c | 6:3e4cfc55a7e9 |
|---|---|
| 105 def cfel_summary(self): | 105 def cfel_summary(self): |
| 106 self.cfel = self._load_json(self.arguments.cfel) | 106 self.cfel = self._load_json(self.arguments.cfel) |
| 107 if self.cfel is None: | 107 if self.cfel is None: |
| 108 return | 108 return |
| 109 node_tags = {} | 109 node_tags = {} |
| 110 _ = self._newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel)['json'] | 110 _ = newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel, self.arguments, self.labels)['json'] |
| 111 if self.summary_json is not None: | 111 if self.summary_json is not None: |
| 112 omegas = {} | 112 omegas = {} |
| 113 T = {} | 113 T = {} |
| 114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: | 114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: |
| 115 if k[0] != 'background': | 115 if k[0] != 'background': |
| 189 | 189 |
| 190 def def_value(): | 190 def def_value(): |
| 191 return defaultdict(int) | 191 return defaultdict(int) |
| 192 compressed_subs = {} | 192 compressed_subs = {} |
| 193 node_tags = {} | 193 node_tags = {} |
| 194 the_tree = self._newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac)['json'] | 194 the_tree = newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac, self.arguments, self.labels) |
| 195 root_node = None | 195 root_node = None |
| 196 if self.summary_json is not None: | 196 if self.summary_json is not None: |
| 197 for branch, info in self.slac['branch attributes']['0'].items(): | 197 for branch, info in self.slac['branch attributes']['0'].items(): |
| 198 if branch in node_tags: | 198 if branch in node_tags: |
| 199 node_tags[branch].append(info['Global MG94xREV']) | 199 node_tags[branch].append(info['Global MG94xREV']) |
| 208 counts_aa_site = {} | 208 counts_aa_site = {} |
| 209 gs = self._get_genomic_annotation(i) | 209 gs = self._get_genomic_annotation(i) |
| 210 if gs[0] >= 0: | 210 if gs[0] >= 0: |
| 211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] | 211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] |
| 212 try: | 212 try: |
| 213 self._traverse_tree_in_order(the_tree, self.slac['branch attributes']['0'], i, None, root_node) | 213 traverse_tree_in_order(the_tree, self.labels, self.slac['branch attributes']['0'], i, None, root_node) |
| 214 except Exception: | 214 except Exception: |
| 215 raise | 215 pass |
| 216 compressed_subs[gs[0]] = self.labels | 216 compressed_subs[gs[0]] = self.labels |
| 217 for k in set([k[0] for k in node_tags.values()]): | 217 for k in set([k[0] for k in node_tags.values()]): |
| 218 if len(k): | 218 if len(k): |
| 219 counts_codon_site[k] = defaultdict(int) | 219 counts_codon_site[k] = defaultdict(int) |
| 220 counts_aa_site[k] = defaultdict(int) | 220 counts_aa_site[k] = defaultdict(int) |
| 431 gene_name = 'Not mapped' | 431 gene_name = 'Not mapped' |
| 432 else: | 432 else: |
| 433 gene_name = 'N/A' | 433 gene_name = 'N/A' |
| 434 return (genomic_site_coord, gene_name, gene_site) | 434 return (genomic_site_coord, gene_name, gene_site) |
| 435 | 435 |
| 436 def _traverse_tree_in_order(self, node, slac_data, i, parent_tag, root): | |
| 437 node_tag = None | |
| 438 if node is None: | |
| 439 return | |
| 440 try: | |
| 441 nn = root if node['name'] == 'root' else node['name'] | |
| 442 except Exception: | |
| 443 raise | |
| 444 if nn in slac_data: | |
| 445 node_tag = slac_data[nn]['codon'][0][i] | |
| 446 if (parent_tag != node_tag): | |
| 447 self.labels[nn] = node_tag | |
| 448 self.labels[node['name']] = node_tag | |
| 449 if 'children' in node: | |
| 450 for c in node['children']: | |
| 451 if c is not None: | |
| 452 if 'name' in c: | |
| 453 self._traverse_tree_in_order(c, slac_data, i, node_tag, root) | |
| 454 | |
| 455 def _match_node_names(self, qry_node, ref_node, mapping): | |
| 456 if 'children' in qry_node and 'children' in ref_node: | |
| 457 mapping[ref_node['name']] = qry_node['name'] | |
| 458 if len(qry_node['children']) != len(ref_node['children']): | |
| 459 raise Exception('Internal topology mismatch') | |
| 460 for i, n in enumerate(ref_node['children']): | |
| 461 self._match_node_names(qry_node['children'][i], n, mapping) | |
| 462 elif 'children' in qry_node: | |
| 463 raise Exception('Topology mismatch') | |
| 464 elif 'children' in ref_node: | |
| 465 raise Exception('Topology mismatch') | |
| 466 else: | |
| 467 if qry_node['name'] != ref_node['name']: | |
| 468 raise Exception('Leaf name mismatch') | |
| 469 | |
| 470 def _get_incoming_labels(self): | 436 def _get_incoming_labels(self): |
| 471 json_data = self._load_json(self.arguments.labels) | 437 json_data = self._load_json(self.arguments.labels) |
| 472 self.incoming_labels = json_data | 438 self.incoming_labels = json_data |
| 473 | 439 |
| 474 def _newick_parser(self, nwk_str, bootstrap_values, track_tags, json_map): | 440 |
| 475 clade_stack = [] | 441 def traverse_tree_in_order(node, labels, slac_data, i, parent_tag, root): |
| 476 automaton_state = 0 | 442 node_tag = None |
| 477 current_node_name = '' | 443 if 'name' not in node: |
| 478 current_node_attribute = '' | 444 nn = root |
| 479 current_node_annotation = '' | 445 else: |
| 480 quote_delimiter = None | 446 nn = root if node["name"] == 'root' else node["name"] |
| 481 name_quotes = {"'": 1, '"': 1} | 447 if nn in slac_data: |
| 482 | 448 node_tag = slac_data[nn]["codon"][0][i] |
| 483 def add_new_tree_level(): | 449 if (parent_tag != node_tag): |
| 484 new_level = {'name': None} | 450 labels[nn] = node_tag |
| 485 the_parent = clade_stack[len(clade_stack) - 1] | 451 labels[node["name"]] = node_tag |
| 486 if ('children' not in the_parent): | 452 if "children" in node: |
| 487 the_parent['children'] = [] | 453 for c in node["children"]: |
| 488 clade_stack.append(new_level) | 454 traverse_tree_in_order(c, labels, slac_data, i, node_tag, root) |
| 489 the_parent['children'].append(clade_stack[len(clade_stack) - 1]) | 455 |
| 490 clade_stack[len(clade_stack) - 1]['original_child_order'] = len(the_parent['children']) | 456 |
| 491 | 457 def newick_parser(nwk_str, bootstrap_values, track_tags, json_map, import_settings, tags): |
| 492 def finish_node_definition(): | 458 clade_stack = [] |
| 493 nonlocal current_node_name | 459 automaton_state = 0 |
| 494 nonlocal current_node_annotation | 460 current_node_name = "" |
| 495 nonlocal current_node_attribute | 461 current_node_attribute = "" |
| 496 this_node = clade_stack.pop() | 462 current_node_annotation = "" |
| 497 if (bootstrap_values and 'children' in this_node): | 463 quote_delimiter = None |
| 498 this_node['bootstrap_values'] = current_node_name | 464 name_quotes = { |
| 465 "'": 1, | |
| 466 '"': 1 | |
| 467 } | |
| 468 | |
| 469 def add_new_tree_level(): | |
| 470 new_level = { | |
| 471 "name": None | |
| 472 } | |
| 473 the_parent = clade_stack[len(clade_stack) - 1] | |
| 474 if "children" not in the_parent: | |
| 475 the_parent["children"] = [] | |
| 476 | |
| 477 clade_stack.append(new_level) | |
| 478 the_parent["children"].append(clade_stack[len(clade_stack) - 1]) | |
| 479 clade_stack[len(clade_stack) - 1]["original_child_order"] = len(the_parent["children"]) | |
| 480 | |
| 481 def finish_node_definition(): | |
| 482 nonlocal current_node_name | |
| 483 nonlocal current_node_annotation | |
| 484 nonlocal current_node_attribute | |
| 485 | |
| 486 this_node = clade_stack.pop() | |
| 487 if (bootstrap_values and "children" in this_node): | |
| 488 this_node["bootstrap_values"] = current_node_name | |
| 489 else: | |
| 490 this_node["name"] = current_node_name | |
| 491 | |
| 492 this_node["attribute"] = current_node_attribute | |
| 493 this_node["annotation"] = current_node_annotation | |
| 494 | |
| 495 try: | |
| 496 | |
| 497 if 'children' not in this_node: | |
| 498 node_tag = import_settings.default_tag | |
| 499 if json_map: | |
| 500 tn = json_map["branch attributes"]["0"][this_node["name"]] | |
| 501 else: | |
| 502 tn = this_node | |
| 503 nn = tn["original name"] if "original name" in tn else tn["name"] | |
| 504 for k, v in tags.items(): | |
| 505 if nn.find(k) >= 0: | |
| 506 node_tag = v | |
| 507 break | |
| 499 else: | 508 else: |
| 500 this_node['name'] = current_node_name | 509 counts = {} |
| 501 this_node['attribute'] = current_node_attribute | 510 node_tag = "" |
| 502 this_node['annotation'] = current_node_annotation | 511 for n in this_node['children']: |
| 503 try: | 512 counts[n["tag"]] = 1 + (counts[n["tag"]] if n["tag"] in counts else 0) |
| 504 if 'children' not in this_node: | 513 if len(counts) == 1: |
| 505 node_tag = self.arguments.default_tag | 514 node_tag = list(counts.keys())[0] |
| 506 if json_map: | 515 |
| 507 tn = json_map['branch attributes']['0'][this_node['name']] | 516 this_node["tag"] = node_tag |
| 517 except Exception as e: | |
| 518 pass | |
| 519 print(e) | |
| 520 | |
| 521 if track_tags is not None: | |
| 522 track_tags[this_node["name"]] = [this_node["tag"], 'children' in this_node] | |
| 523 | |
| 524 current_node_name = "" | |
| 525 current_node_attribute = "" | |
| 526 current_node_annotation = "" | |
| 527 | |
| 528 def generate_error(location): | |
| 529 return { | |
| 530 'json': None, | |
| 531 'error': | |
| 532 "Unexpected '%s' in '%s[ERROR HERE]%s'" % (nwk_str[location], nwk_str[location - 20:location + 1], nwk_str[location + 1:location + 20]) | |
| 533 } | |
| 534 | |
| 535 tree_json = { | |
| 536 "name": "root" | |
| 537 } | |
| 538 | |
| 539 clade_stack.append(tree_json) | |
| 540 | |
| 541 space = re.compile(r"\s") | |
| 542 | |
| 543 for char_index in range(len(nwk_str)): | |
| 544 try: | |
| 545 current_char = nwk_str[char_index] | |
| 546 if automaton_state == 0: | |
| 547 # look for the first opening parenthesis | |
| 548 if (current_char == "("): | |
| 549 add_new_tree_level() | |
| 550 automaton_state = 1 | |
| 551 elif automaton_state == 1 or automaton_state == 3: | |
| 552 # case 1: // name | |
| 553 # case 3: { // branch length | |
| 554 # reading name | |
| 555 if (current_char == ":"): | |
| 556 automaton_state = 3 | |
| 557 elif current_char == "," or current_char == ")": | |
| 558 try: | |
| 559 finish_node_definition() | |
| 560 automaton_state = 1 | |
| 561 if (current_char == ","): | |
| 562 add_new_tree_level() | |
| 563 except Exception as e: | |
| 564 return generate_error(char_index) | |
| 565 print(e) | |
| 566 | |
| 567 elif (current_char == "("): | |
| 568 if len(current_node_name) > 0: | |
| 569 return generate_error(char_index) | |
| 508 else: | 570 else: |
| 509 tn = this_node | |
| 510 nn = tn['original name'] if 'original name' in tn else tn['name'] | |
| 511 for k, v in self.incoming_labels.items(): | |
| 512 if nn.find(k) >= 0: | |
| 513 node_tag = v | |
| 514 break | |
| 515 else: | |
| 516 counts = {} | |
| 517 node_tag = '' | |
| 518 for n in this_node['children']: | |
| 519 counts[n['tag']] = 1 + (counts[n['tag']] if n['tag'] in counts else 0) | |
| 520 if len(counts) == 1: | |
| 521 node_tag = list(counts.keys())[0] | |
| 522 this_node['tag'] = node_tag | |
| 523 except Exception: | |
| 524 raise | |
| 525 if track_tags is not None: | |
| 526 track_tags[this_node['name']] = [this_node['tag'], 'children' in this_node] | |
| 527 current_node_name = '' | |
| 528 current_node_attribute = '' | |
| 529 current_node_annotation = '' | |
| 530 | |
| 531 def generate_error(location): | |
| 532 unexpected = nwk_str[location] | |
| 533 before = nwk_str[location - 20:location + 1] | |
| 534 after = nwk_str[location + 1:location + 20] | |
| 535 return { | |
| 536 'json': None, | |
| 537 'error': 'Unexpected %s in %s [ERROR HERE] %s' % (unexpected, before, after) | |
| 538 } | |
| 539 tree_json = {'name': 'root'} | |
| 540 clade_stack.append(tree_json) | |
| 541 space = re.compile(r'\s') | |
| 542 for char_index in range(len(nwk_str)): | |
| 543 try: | |
| 544 current_char = nwk_str[char_index] | |
| 545 if automaton_state == 0: | |
| 546 # look for the first opening parenthesis | |
| 547 if (current_char == '('): | |
| 548 add_new_tree_level() | 571 add_new_tree_level() |
| 549 automaton_state = 1 | 572 |
| 550 elif automaton_state == 1 or automaton_state == 3: | 573 elif (current_char in name_quotes): |
| 551 # case 1: // name | 574 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: |
| 552 # case 3: { // branch length | 575 automaton_state = 2 |
| 553 # reading name | 576 quote_delimiter = current_char |
| 554 if (current_char == ':'): | 577 continue |
| 555 automaton_state = 3 | 578 return generate_error(char_index) |
| 556 elif current_char == ',' or current_char == ')': | 579 else: |
| 557 try: | 580 if (current_char == "["): |
| 558 finish_node_definition() | 581 if len(current_node_annotation): |
| 559 automaton_state = 1 | |
| 560 if (current_char == ','): | |
| 561 add_new_tree_level() | |
| 562 except Exception: | |
| 563 return generate_error(char_index) | |
| 564 elif (current_char == '('): | |
| 565 if len(current_node_name) > 0: | |
| 566 return generate_error(char_index) | 582 return generate_error(char_index) |
| 567 else: | 583 else: |
| 568 add_new_tree_level() | 584 automaton_state = 4 |
| 569 elif (current_char in name_quotes): | 585 else: |
| 570 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: | 586 if (automaton_state == 3): |
| 571 automaton_state = 2 | 587 current_node_attribute += current_char |
| 572 quote_delimiter = current_char | 588 else: |
| 589 if (space.search(current_char)): | |
| 590 continue | |
| 591 if (current_char == ""): | |
| 592 char_index = len(nwk_str) | |
| 593 break | |
| 594 current_node_name += current_char | |
| 595 elif automaton_state == 2: | |
| 596 # inside a quoted expression | |
| 597 if (current_char == quote_delimiter): | |
| 598 if (char_index < len(nwk_str - 1)): | |
| 599 if (nwk_str[char_index + 1] == quote_delimiter): | |
| 600 char_index += 1 | |
| 601 current_node_name += quote_delimiter | |
| 573 continue | 602 continue |
| 603 | |
| 604 quote_delimiter = 0 | |
| 605 automaton_state = 1 | |
| 606 continue | |
| 607 else: | |
| 608 current_node_name += current_char | |
| 609 elif automaton_state == 4: | |
| 610 # inside a comment / attribute | |
| 611 if (current_char == "]"): | |
| 612 automaton_state = 3 | |
| 613 else: | |
| 614 if (current_char == "["): | |
| 574 return generate_error(char_index) | 615 return generate_error(char_index) |
| 575 else: | 616 current_node_annotation += current_char |
| 576 if (current_char == '['): | 617 except Exception as e: |
| 577 if len(current_node_annotation): | 618 return generate_error(char_index) |
| 578 return generate_error(char_index) | 619 print(e) |
| 579 else: | 620 |
| 580 automaton_state = 4 | 621 if (len(clade_stack) != 1): |
| 581 else: | 622 return generate_error(len(nwk_str) - 1) |
| 582 if (automaton_state == 3): | 623 |
| 583 current_node_attribute += current_char | 624 if (len(current_node_name)): |
| 584 else: | 625 tree_json['name'] = current_node_name |
| 585 if (space.search(current_char)): | 626 |
| 586 continue | 627 return { |
| 587 if (current_char == ';'): | 628 'json': tree_json, |
| 588 char_index = len(nwk_str) | 629 'error': None |
| 589 break | 630 } |
| 590 current_node_name += current_char | |
| 591 elif automaton_state == 2: | |
| 592 # inside a quoted expression | |
| 593 if (current_char == quote_delimiter): | |
| 594 if (char_index < len(nwk_str - 1)): | |
| 595 if (nwk_str[char_index + 1] == quote_delimiter): | |
| 596 char_index += 1 | |
| 597 current_node_name += quote_delimiter | |
| 598 continue | |
| 599 quote_delimiter = 0 | |
| 600 automaton_state = 1 | |
| 601 continue | |
| 602 else: | |
| 603 current_node_name += current_char | |
| 604 elif automaton_state == 4: | |
| 605 # inside a comment / attribute | |
| 606 if (current_char == ']'): | |
| 607 automaton_state = 3 | |
| 608 else: | |
| 609 if (current_char == '['): | |
| 610 return generate_error(char_index) | |
| 611 current_node_annotation += current_char | |
| 612 except Exception: | |
| 613 return generate_error(char_index) | |
| 614 | |
| 615 if (len(clade_stack) != 1): | |
| 616 return generate_error(len(nwk_str) - 1) | |
| 617 | |
| 618 if (len(current_node_name)): | |
| 619 tree_json['name'] = current_node_name | |
| 620 | |
| 621 return { | |
| 622 'json': tree_json, | |
| 623 'error': None | |
| 624 } | |
| 625 | 631 |
| 626 | 632 |
| 627 if __name__ == '__main__': | 633 if __name__ == '__main__': |
| 628 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') | 634 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') |
| 629 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) | 635 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) |
