Mercurial > repos > dfornika > mentalist
view tools/mentalist_tree/mentalist_tree @ 63:c9d05c980d2a draft
planemo upload for repository https://github.com/WGS-TB/MentaLiST/tree/master/galaxy commit a58fc8147c7cfe652f310f1e8b50c162fd909c9a-dirty
author | dfornika |
---|---|
date | Wed, 13 Jun 2018 19:02:38 -0400 |
parents | 931bd8ac187d |
children | 79dd7b62e4a8 |
line wrap: on
line source
#!/usr/bin/env python import sys import csv import numpy as np import Bio.Phylo from Bio.Phylo.TreeConstruction import DistanceMatrix, DistanceTreeConstructor def usage(): print("usage: mentalist_tree <input.tsv>\n") def process_input_matrix(input_matrix): """ Converts an array-of-arrays containting sample IDs and distances into a BioPython DistanceMatrix object """ input_matrix.pop(0) sample_names = [row[0] for row in input_matrix] for row in input_matrix: row.pop(0) distance_matrix = [] for input_matrix_row in input_matrix: distance_matrix.append([int(i) for i in input_matrix_row]) distance_matrix = np.tril(np.array(distance_matrix)) num_rows = distance_matrix.shape[0] lower_triangular_idx_mask = np.tril_indices(num_rows) linear_distance_matrix = distance_matrix[lower_triangular_idx_mask] distance_matrix = [] min = 0 max = 1 for i in range(num_rows): distance_matrix.append(linear_distance_matrix[min:max].tolist()) min = max max = max + (i + 2) distance_matrix = DistanceMatrix(names=sample_names, matrix=distance_matrix) return distance_matrix def main(): if len(sys.argv) < 2: usage() sys.exit(1) input_file = sys.argv[1] reader = csv.reader(open(input_file, "r"), delimiter="\t") input_matrix = list(reader) distance_matrix = process_input_matrix(input_matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(distance_matrix) Bio.Phylo.write(tree, sys.stdout, 'newick') if __name__ == '__main__': main()