Mercurial > repos > iuc > get_hrun
diff get_hrun.py @ 0:ccb3b00e6405 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
| author | iuc |
|---|---|
| date | Tue, 02 Mar 2021 21:35:15 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_hrun.py Tue Mar 02 21:35:15 2021 +0000 @@ -0,0 +1,64 @@ +#!/usr/bin/env python +import argparse + +import vcf +from pyfaidx import Fasta +from vcf.parser import _Info as VcfInfo + + +parser = argparse.ArgumentParser(description='Generate report tables') +parser.add_argument("--reference", + required=True, + help="Filepath to reference FASTA file") +parser.add_argument("--in-vcf", + required=True, + help="Filepath to vcf file to be analyzed") +parser.add_argument("--out-vcf", + required=True, + help="Filepath to vcf file to be output") + +args = parser.parse_args() +ref_path = args.reference +reference = Fasta(ref_path, sequence_always_upper=True, read_ahead=1000) +in_vcf_path = args.in_vcf +in_vcf_handle = open(in_vcf_path) +in_vcf = vcf.Reader(in_vcf_handle) +in_vcf.infos['HRUN'] = VcfInfo( + 'HRUN', 1, 'Integer', + 'Homopolymer length to the right of report indel position', + "get_hrun", "1.0") +out_vcf_path = args.out_vcf +out_vcf_handle = open(out_vcf_path, 'w') +out_vcf = vcf.Writer(out_vcf_handle, in_vcf) +for record in in_vcf: + chrom = record.CHROM + pos = record.POS - 1 + ref = record.REF + calc_hrun = False + for alt in record.ALT: + if len(ref) != len(alt): + calc_hrun = True + if calc_hrun: + window = 50 + hrun = 1 + start = pos + 2 + end = start + window + base = reference[chrom][pos + 1] + seq_len = len(reference[chrom]) + for i in range(start, len(reference)): + base2 = reference[chrom][i] + if base == base2: + hrun += 1 + else: + break + # Extend to left in case not left aligned + for i in range(pos, -1, -1): + if reference[chrom][i] == base: + hrun += 1 + else: + break + record.add_info('HRUN', [hrun]) + out_vcf.write_record(record) +in_vcf_handle.close() +out_vcf.close() +out_vcf_handle.close()
