comparison gff_to_prot.py @ 5:dfd652f412bd draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 73c6b2baf9dda26c6809a4f36582f7cbdb161ea1
author iuc
date Mon, 22 Apr 2019 14:39:34 -0400
parents a7febbfe3df3
children
comparison
equal deleted inserted replaced
4:b2f6cbdc5858 5:dfd652f412bd
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 import csv
3 import os
2 import sys 4 import sys
3 import os 5
4 import csv
5 6
6 def get_description(line, parent): 7 def get_description(line, parent):
7 cols = line.split('\t') 8 cols = line.split('\t')
8 labels = {} 9 labels = {}
9 for pair in cols[8].split(";"): 10 for pair in cols[8].split(";"):
20 output_file = open(output_name, 'w') 21 output_file = open(output_name, 'w')
21 writer = csv.writer(output_file, delimiter='\t') 22 writer = csv.writer(output_file, delimiter='\t')
22 lines = gff_file.readlines() 23 lines = gff_file.readlines()
23 gff_file.close() 24 gff_file.close()
24 for i, line in enumerate(lines): 25 for i, line in enumerate(lines):
25 lie = line.strip() 26 line = line.strip()
26 if line.startswith('#'): continue 27 if line.startswith('#'):
28 continue
27 cols = line.split('\t') 29 cols = line.split('\t')
28 if (len(cols) < 9): 30 if (len(cols) < 9):
29 print("Ignoring invalid row with entries: {0}".format(cols)) 31 print("Ignoring invalid row with entries: {0}".format(cols))
30 elif (cols[2]) == "region": continue 32 elif (cols[2]) == "region":
31 elif (cols[2]) == "CDS": continue 33 continue
34 elif (cols[2]) == "CDS":
35 continue
32 elif (cols[2]) == "gene": 36 elif (cols[2]) == "gene":
33 start = int(cols[3]) 37 start = int(cols[3])
34 end = int(cols[4]) 38 end = int(cols[4])
35 strand = cols[6].strip() 39 strand = cols[6].strip()
36 labels = {} 40 labels = {}
37 diff = int(abs(end - start)/3) ## What is this called? 41 diff = int(abs(end - start) / 3) # What is this called?
38 for pair in cols[8].split(";"): 42 for pair in cols[8].split(";"):
39 k, v = pair.split('=') 43 k, v = pair.split('=')
40 labels[k.strip()] = v.strip() 44 labels[k.strip()] = v.strip()
41 45
42 Rv = labels["locus_tag"].strip() # error out if not found 46 Rv = labels["locus_tag"].strip() # error out if not found
43 gene = labels.get('Name', '') 47 gene = labels.get('Name', '')
44 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-' 48 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-'
45 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-'] 49 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-']
46 writer.writerow(vals) 50 writer.writerow(vals)
47 output_file.close() 51 output_file.close()
52
48 53
49 if __name__ == "__main__": 54 if __name__ == "__main__":
50 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>" 55 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>"
51 56
52 if len(sys.argv) < 3: 57 if len(sys.argv) < 3: