comparison kma_result_to_mlst.py @ 0:934f961a7189 draft default tip

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/kma_result_to_mlst commit 62e7cd82cb9b209bf3f797ae288916e88bbe8bc6-dirty"
author dfornika
date Thu, 31 Oct 2019 14:12:43 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:934f961a7189
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4
5 import argparse
6 import csv
7 import json
8 import os
9 import sys
10
11 from pprint import pprint
12
13 def parse_res_file(res_file_path):
14 LOCUS_ALLELE_DELIMITER = '_'
15
16 res_fieldnames = [
17 'template',
18 'score',
19 'expected',
20 'template_length',
21 'template_identity',
22 'template_coverage',
23 'query_identity',
24 'query_coverage',
25 'depth',
26 'q_value',
27 'p_value',
28 ]
29
30 with open(res_file_path, 'r') as f:
31 loci = {}
32 reader = csv.DictReader(f, fieldnames=res_fieldnames, dialect="excel-tab")
33 next(reader) #skip header
34 for row in reader:
35 locus, allele = map(str.strip, row['template'].split(LOCUS_ALLELE_DELIMITER))
36 if locus in loci:
37 loci[locus][allele] = {
38 'locus_id': locus,
39 'allele_id': allele,
40 'score': int(row['score'].strip()),
41 'expected': int(row['expected'].strip()),
42 'template_length': int(row['template_length'].strip()),
43 'template_identity': float(row['template_identity'].strip()),
44 'template_coverage': float(row['template_coverage'].strip()),
45 'query_identity': float(row['query_identity'].strip()),
46 'query_coverage': float(row['query_coverage'].strip()),
47 'depth': float(row['depth'].strip()),
48 'q_value': float(row['q_value'].strip()),
49 'p_value': float(row['p_value'].strip()),
50 }
51 else:
52 loci[locus] = {}
53 loci[locus][allele] = {
54 'locus_id': locus,
55 'allele_id': allele,
56 'score': int(row['score'].strip()),
57 'expected': int(row['expected'].strip()),
58 'template_length': int(row['template_length'].strip()),
59 'template_identity': float(row['template_identity'].strip()),
60 'template_coverage': float(row['template_coverage'].strip()),
61 'query_identity': float(row['query_identity'].strip()),
62 'query_coverage': float(row['query_coverage'].strip()),
63 'depth': float(row['depth'].strip()),
64 'q_value': float(row['q_value'].strip()),
65 'p_value': float(row['p_value'].strip()),
66 }
67
68 return loci
69
70 def main(args):
71
72 loci = parse_res_file(args.res)
73 print("\t".join([
74 "locus_id",
75 "allele_id",
76 "template_identity",
77 "template_coverage",
78 "depth",
79 ]))
80
81 for locus, alleles in loci.items():
82 best_allele = sorted(alleles.values(),
83 key=lambda x: x['score'], reverse=True)[0]['allele_id']
84
85 print("\t".join([
86 alleles[best_allele]['locus_id'],
87 alleles[best_allele]['allele_id'],
88 str(alleles[best_allele]['template_identity']),
89 str(alleles[best_allele]['template_coverage']),
90 str(alleles[best_allele]['depth']),
91 ]))
92
93 if __name__ == '__main__':
94 parser = argparse.ArgumentParser()
95 parser.add_argument("--res", dest="res", help="KMA result overview file")
96 args = parser.parse_args()
97 main(args)