Mercurial > repos > dfornika > ivar_variants_to_vcf
comparison ivar_variants_to_vcf.py @ 0:c87f6ad32fd8 draft default tip
"planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/ivar_variants_to_vcf commit 16332019b4aab6af58c74e631f390dfeef23a3dc"
author | dfornika |
---|---|
date | Fri, 05 Jun 2020 05:10:05 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c87f6ad32fd8 |
---|---|
1 #!/usr/bin/env python | |
2 import os | |
3 import sys | |
4 import re | |
5 import errno | |
6 import argparse | |
7 | |
8 def parse_args(args=None): | |
9 Description = 'Convert iVar variants tsv file to vcf format.' | |
10 Epilog = """Example usage: python ivar_variants_to_vcf.py <FILE_IN> <FILE_OUT>""" | |
11 | |
12 parser = argparse.ArgumentParser(description=Description, epilog=Epilog) | |
13 parser.add_argument('FILE_IN', help="Input tsv file.") | |
14 parser.add_argument('FILE_OUT', help="Full path to output vcf file.") | |
15 parser.add_argument('-po', '--pass_only', dest="PASS_ONLY", help="Only output variants that PASS all filters.",action='store_true') | |
16 parser.add_argument('-ma', '--min_allele_freq', type=float, dest="MIN_ALLELE_FREQ", default=0, help="Only output variants where allele frequency greater than this number (default: 0).") | |
17 | |
18 return parser.parse_args(args) | |
19 | |
20 def make_dir(path): | |
21 if not len(path) == 0: | |
22 try: | |
23 os.makedirs(path) | |
24 except OSError as exception: | |
25 if exception.errno != errno.EEXIST: | |
26 raise | |
27 | |
28 def ivar_variants_to_vcf(FileIn,FileOut,passOnly=False,minAF=0): | |
29 filename = os.path.splitext(FileIn)[0] | |
30 header = ('##fileformat=VCFv4.2\n' | |
31 '##source=iVar\n' | |
32 '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n' | |
33 '##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">\n' | |
34 '##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">\n' | |
35 '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n' | |
36 '##FORMAT=<ID=REF_DP,Number=1,Type=Integer,Description="Depth of reference base">\n' | |
37 '##FORMAT=<ID=REF_RV,Number=1,Type=Integer,Description="Depth of reference base on reverse reads">\n' | |
38 '##FORMAT=<ID=REF_QUAL,Number=1,Type=Integer,Description="Mean quality of reference base">\n' | |
39 '##FORMAT=<ID=ALT_DP,Number=1,Type=Integer,Description="Depth of alternate base">\n' | |
40 '##FORMAT=<ID=ALT_RV,Number=1,Type=Integer,Description="Deapth of alternate base on reverse reads">\n' | |
41 '##FORMAT=<ID=ALT_QUAL,Number=1,Type=String,Description="Mean quality of alternate base">\n' | |
42 '##FORMAT=<ID=ALT_FREQ,Number=1,Type=String,Description="Frequency of alternate base">\n') | |
43 header += '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'+filename+'\n' | |
44 | |
45 varList = [] | |
46 varCountDict = {'SNP':0, 'INS':0, 'DEL':0} | |
47 OutDir = os.path.dirname(FileOut) | |
48 make_dir(OutDir) | |
49 fout = open(FileOut,'w') | |
50 fout.write(header) | |
51 with open(FileIn) as f: | |
52 for line in f: | |
53 if not re.match("REGION",line): | |
54 line = re.split("\t", line) | |
55 CHROM=line[0] | |
56 POS=line[1] | |
57 ID='.' | |
58 REF=line[2] | |
59 ALT=line[3] | |
60 var_type = 'SNP' | |
61 if ALT[0] == '+': | |
62 ALT = REF + ALT[1:] | |
63 var_type = 'INS' | |
64 elif ALT[0] == '-': | |
65 REF += ALT[1:] | |
66 ALT = line[2] | |
67 var_type = 'DEL' | |
68 QUAL='.' | |
69 pass_test=line[13] | |
70 if pass_test == 'TRUE': | |
71 FILTER='PASS' | |
72 else: | |
73 FILTER='FAIL' | |
74 INFO='DP='+line[11] | |
75 FORMAT='GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ' | |
76 SAMPLE='1:'+line[4]+':'+line[5]+':'+line[6]+':'+line[7]+':'+line[8]+':'+line[9]+':'+line[10] | |
77 oline = CHROM+'\t'+POS+'\t'+ID+'\t'+REF+'\t'+ALT+'\t'+QUAL+'\t'+FILTER+'\t'+INFO+'\t'+FORMAT+'\t'+SAMPLE+'\n' | |
78 writeLine = True | |
79 if passOnly and FILTER != 'PASS': | |
80 writeLine = False | |
81 if float(line[10]) < minAF: | |
82 writeLine = False | |
83 if (CHROM,POS,REF,ALT) in varList: | |
84 writeLine = False | |
85 else: | |
86 varList.append((CHROM,POS,REF,ALT)) | |
87 if writeLine: | |
88 varCountDict[var_type] += 1 | |
89 fout.write(oline) | |
90 fout.close() | |
91 | |
92 ## Print variant counts to pass to MultiQC | |
93 varCountList = [(k, str(v)) for k, v in sorted(varCountDict.items())] | |
94 print('\t'.join(['sample'] + [x[0] for x in varCountList])) | |
95 print('\t'.join([filename] + [x[1] for x in varCountList])) | |
96 | |
97 def main(args=None): | |
98 args = parse_args(args) | |
99 ivar_variants_to_vcf(args.FILE_IN,args.FILE_OUT,args.PASS_ONLY,args.MIN_ALLELE_FREQ) | |
100 | |
101 | |
102 if __name__ == '__main__': | |
103 sys.exit(main()) |