Mercurial > repos > dfornika > mob_suite
comparison distance_matrix_phylip.py @ 38:17a60dd45b31 draft
"planemo upload for repository https://github.com/phac-nml/mob-suite commit 608abbed8881523f97c0378e350f32243a754237-dirty"
author | dfornika |
---|---|
date | Wed, 30 Oct 2019 23:38:12 -0400 |
parents | 115b462224cf |
children |
comparison
equal
deleted
inserted
replaced
37:7124bff199fa | 38:17a60dd45b31 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import sys | |
5 import csv | |
6 import numpy as np | |
7 | |
8 from Bio.Phylo.TreeConstruction import DistanceMatrix, DistanceTreeConstructor | |
9 | |
10 | |
11 def process_input_matrix(input_matrix): | |
12 """ Converts an array-of-arrays containting sample IDs and distances | |
13 into a BioPython DistanceMatrix object | |
14 """ | |
15 input_matrix.pop(0) | |
16 sample_names = [row[0] for row in input_matrix] | |
17 for row in input_matrix: | |
18 row.pop(0) | |
19 distance_matrix = [] | |
20 for input_matrix_row in input_matrix: | |
21 distance_matrix.append([float(i) for i in input_matrix_row]) | |
22 """ np.tril() converts a matrix like this: [[0 1 2] | |
23 [1 0 1] | |
24 [2 1 0]] | |
25 ...into this: [[0 0 0] | |
26 [1 0 0] | |
27 [2 1 0]] | |
28 ...but what we need to pass to DistanceMatrix() is this: [[0] | |
29 [1 0] | |
30 [2 1 0]] | |
31 ...so that's what the (somewhat cryptic) code below does. | |
32 """ | |
33 distance_matrix = np.tril(np.array(distance_matrix)) | |
34 num_rows = distance_matrix.shape[0] | |
35 """ masking the distance matrix with tril_indices gives a linearized | |
36 distance matrix [0 1 0 2 1 0] that we need to re-construct | |
37 into [[0], [1, 0], [2, 1, 0]] | |
38 """ | |
39 lower_triangular_idx_mask = np.tril_indices(num_rows) | |
40 linear_distance_matrix = distance_matrix[lower_triangular_idx_mask] | |
41 distance_matrix = [] | |
42 min = 0 | |
43 max = 1 | |
44 for i in range(num_rows): | |
45 distance_matrix.append(linear_distance_matrix[min:max].tolist()) | |
46 min = max | |
47 max = max + (i + 2) | |
48 | |
49 distance_matrix = DistanceMatrix(names=sample_names, matrix=distance_matrix) | |
50 | |
51 return distance_matrix | |
52 | |
53 def main(): | |
54 parser = argparse.ArgumentParser() | |
55 parser.add_argument("--input", dest="input", help="") | |
56 args = parser.parse_args() | |
57 | |
58 reader = csv.reader(open(args.input, "r"), delimiter="\t") | |
59 input_matrix = list(reader) | |
60 # Don't build a tree with fewer than 3 samples, just produce an empty file | |
61 if len(input_matrix) < 4: | |
62 print('();') | |
63 sys.exit(0) | |
64 distance_matrix = process_input_matrix(input_matrix) | |
65 distance_matrix.format_phylip(sys.stdout) | |
66 | |
67 | |
68 if __name__ == '__main__': | |
69 main() |