Mercurial > repos > dfornika > kma_result_to_mlst
comparison kma_result_to_mlst.py @ 0:934f961a7189 draft default tip
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/kma_result_to_mlst commit 62e7cd82cb9b209bf3f797ae288916e88bbe8bc6-dirty"
author | dfornika |
---|---|
date | Thu, 31 Oct 2019 14:12:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:934f961a7189 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import csv | |
7 import json | |
8 import os | |
9 import sys | |
10 | |
11 from pprint import pprint | |
12 | |
13 def parse_res_file(res_file_path): | |
14 LOCUS_ALLELE_DELIMITER = '_' | |
15 | |
16 res_fieldnames = [ | |
17 'template', | |
18 'score', | |
19 'expected', | |
20 'template_length', | |
21 'template_identity', | |
22 'template_coverage', | |
23 'query_identity', | |
24 'query_coverage', | |
25 'depth', | |
26 'q_value', | |
27 'p_value', | |
28 ] | |
29 | |
30 with open(res_file_path, 'r') as f: | |
31 loci = {} | |
32 reader = csv.DictReader(f, fieldnames=res_fieldnames, dialect="excel-tab") | |
33 next(reader) #skip header | |
34 for row in reader: | |
35 locus, allele = map(str.strip, row['template'].split(LOCUS_ALLELE_DELIMITER)) | |
36 if locus in loci: | |
37 loci[locus][allele] = { | |
38 'locus_id': locus, | |
39 'allele_id': allele, | |
40 'score': int(row['score'].strip()), | |
41 'expected': int(row['expected'].strip()), | |
42 'template_length': int(row['template_length'].strip()), | |
43 'template_identity': float(row['template_identity'].strip()), | |
44 'template_coverage': float(row['template_coverage'].strip()), | |
45 'query_identity': float(row['query_identity'].strip()), | |
46 'query_coverage': float(row['query_coverage'].strip()), | |
47 'depth': float(row['depth'].strip()), | |
48 'q_value': float(row['q_value'].strip()), | |
49 'p_value': float(row['p_value'].strip()), | |
50 } | |
51 else: | |
52 loci[locus] = {} | |
53 loci[locus][allele] = { | |
54 'locus_id': locus, | |
55 'allele_id': allele, | |
56 'score': int(row['score'].strip()), | |
57 'expected': int(row['expected'].strip()), | |
58 'template_length': int(row['template_length'].strip()), | |
59 'template_identity': float(row['template_identity'].strip()), | |
60 'template_coverage': float(row['template_coverage'].strip()), | |
61 'query_identity': float(row['query_identity'].strip()), | |
62 'query_coverage': float(row['query_coverage'].strip()), | |
63 'depth': float(row['depth'].strip()), | |
64 'q_value': float(row['q_value'].strip()), | |
65 'p_value': float(row['p_value'].strip()), | |
66 } | |
67 | |
68 return loci | |
69 | |
70 def main(args): | |
71 | |
72 loci = parse_res_file(args.res) | |
73 print("\t".join([ | |
74 "locus_id", | |
75 "allele_id", | |
76 "template_identity", | |
77 "template_coverage", | |
78 "depth", | |
79 ])) | |
80 | |
81 for locus, alleles in loci.items(): | |
82 best_allele = sorted(alleles.values(), | |
83 key=lambda x: x['score'], reverse=True)[0]['allele_id'] | |
84 | |
85 print("\t".join([ | |
86 alleles[best_allele]['locus_id'], | |
87 alleles[best_allele]['allele_id'], | |
88 str(alleles[best_allele]['template_identity']), | |
89 str(alleles[best_allele]['template_coverage']), | |
90 str(alleles[best_allele]['depth']), | |
91 ])) | |
92 | |
93 if __name__ == '__main__': | |
94 parser = argparse.ArgumentParser() | |
95 parser.add_argument("--res", dest="res", help="KMA result overview file") | |
96 args = parser.parse_args() | |
97 main(args) |