Mercurial > repos > dfornika > kma_result_to_mlst
comparison kma_result_to_mlst.py @ 0:934f961a7189 draft default tip
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/kma_result_to_mlst commit 62e7cd82cb9b209bf3f797ae288916e88bbe8bc6-dirty"
| author | dfornika |
|---|---|
| date | Thu, 31 Oct 2019 14:12:43 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:934f961a7189 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 from __future__ import print_function | |
| 4 | |
| 5 import argparse | |
| 6 import csv | |
| 7 import json | |
| 8 import os | |
| 9 import sys | |
| 10 | |
| 11 from pprint import pprint | |
| 12 | |
| 13 def parse_res_file(res_file_path): | |
| 14 LOCUS_ALLELE_DELIMITER = '_' | |
| 15 | |
| 16 res_fieldnames = [ | |
| 17 'template', | |
| 18 'score', | |
| 19 'expected', | |
| 20 'template_length', | |
| 21 'template_identity', | |
| 22 'template_coverage', | |
| 23 'query_identity', | |
| 24 'query_coverage', | |
| 25 'depth', | |
| 26 'q_value', | |
| 27 'p_value', | |
| 28 ] | |
| 29 | |
| 30 with open(res_file_path, 'r') as f: | |
| 31 loci = {} | |
| 32 reader = csv.DictReader(f, fieldnames=res_fieldnames, dialect="excel-tab") | |
| 33 next(reader) #skip header | |
| 34 for row in reader: | |
| 35 locus, allele = map(str.strip, row['template'].split(LOCUS_ALLELE_DELIMITER)) | |
| 36 if locus in loci: | |
| 37 loci[locus][allele] = { | |
| 38 'locus_id': locus, | |
| 39 'allele_id': allele, | |
| 40 'score': int(row['score'].strip()), | |
| 41 'expected': int(row['expected'].strip()), | |
| 42 'template_length': int(row['template_length'].strip()), | |
| 43 'template_identity': float(row['template_identity'].strip()), | |
| 44 'template_coverage': float(row['template_coverage'].strip()), | |
| 45 'query_identity': float(row['query_identity'].strip()), | |
| 46 'query_coverage': float(row['query_coverage'].strip()), | |
| 47 'depth': float(row['depth'].strip()), | |
| 48 'q_value': float(row['q_value'].strip()), | |
| 49 'p_value': float(row['p_value'].strip()), | |
| 50 } | |
| 51 else: | |
| 52 loci[locus] = {} | |
| 53 loci[locus][allele] = { | |
| 54 'locus_id': locus, | |
| 55 'allele_id': allele, | |
| 56 'score': int(row['score'].strip()), | |
| 57 'expected': int(row['expected'].strip()), | |
| 58 'template_length': int(row['template_length'].strip()), | |
| 59 'template_identity': float(row['template_identity'].strip()), | |
| 60 'template_coverage': float(row['template_coverage'].strip()), | |
| 61 'query_identity': float(row['query_identity'].strip()), | |
| 62 'query_coverage': float(row['query_coverage'].strip()), | |
| 63 'depth': float(row['depth'].strip()), | |
| 64 'q_value': float(row['q_value'].strip()), | |
| 65 'p_value': float(row['p_value'].strip()), | |
| 66 } | |
| 67 | |
| 68 return loci | |
| 69 | |
| 70 def main(args): | |
| 71 | |
| 72 loci = parse_res_file(args.res) | |
| 73 print("\t".join([ | |
| 74 "locus_id", | |
| 75 "allele_id", | |
| 76 "template_identity", | |
| 77 "template_coverage", | |
| 78 "depth", | |
| 79 ])) | |
| 80 | |
| 81 for locus, alleles in loci.items(): | |
| 82 best_allele = sorted(alleles.values(), | |
| 83 key=lambda x: x['score'], reverse=True)[0]['allele_id'] | |
| 84 | |
| 85 print("\t".join([ | |
| 86 alleles[best_allele]['locus_id'], | |
| 87 alleles[best_allele]['allele_id'], | |
| 88 str(alleles[best_allele]['template_identity']), | |
| 89 str(alleles[best_allele]['template_coverage']), | |
| 90 str(alleles[best_allele]['depth']), | |
| 91 ])) | |
| 92 | |
| 93 if __name__ == '__main__': | |
| 94 parser = argparse.ArgumentParser() | |
| 95 parser.add_argument("--res", dest="res", help="KMA result overview file") | |
| 96 args = parser.parse_args() | |
| 97 main(args) |
