annotate gff_to_prot.py @ 2:a7febbfe3df3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
author iuc
date Thu, 28 Feb 2019 09:05:27 -0500
parents
children dfd652f412bd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
1 #!/usr/bin/env python
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
2 import sys
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
3 import os
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
4 import csv
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
5
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
6 def get_description(line, parent):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
7 cols = line.split('\t')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
8 labels = {}
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
9 for pair in cols[8].split(";"):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
10 k, v = pair.split('=')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
11 labels[k] = v
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
12
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
13 if (cols[2]) == "CDS" and labels["Parent"] == parent:
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
14 return labels.get("Note", '-')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
15 return '-'
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
16
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
17
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
18 def convert_to_prot_table(fname, output_name):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
19 gff_file = open(fname)
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
20 output_file = open(output_name, 'w')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
21 writer = csv.writer(output_file, delimiter='\t')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
22 lines = gff_file.readlines()
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
23 gff_file.close()
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
24 for i, line in enumerate(lines):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
25 lie = line.strip()
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
26 if line.startswith('#'): continue
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
27 cols = line.split('\t')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
28 if (len(cols) < 9):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
29 print("Ignoring invalid row with entries: {0}".format(cols))
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
30 elif (cols[2]) == "region": continue
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
31 elif (cols[2]) == "CDS": continue
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
32 elif (cols[2]) == "gene":
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
33 start = int(cols[3])
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
34 end = int(cols[4])
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
35 strand = cols[6].strip()
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
36 labels = {}
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
37 diff = int(abs(end - start)/3) ## What is this called?
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
38 for pair in cols[8].split(";"):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
39 k, v = pair.split('=')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
40 labels[k.strip()] = v.strip()
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
41
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
42 Rv = labels["locus_tag"].strip() # error out if not found
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
43 gene = labels.get('Name', '')
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
44 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-'
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
45 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-']
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
46 writer.writerow(vals)
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
47 output_file.close()
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
48
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
49 if __name__ == "__main__":
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
50 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>"
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
51
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
52 if len(sys.argv) < 3:
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
53 print(usage_string)
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
54 sys.exit(0)
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
55 file_name = sys.argv[1]
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
56 if not os.path.exists(file_name):
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
57 print("File not found. Exiting...")
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
58 print(usage_string)
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
59 sys.exit(0)
a7febbfe3df3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
iuc
parents:
diff changeset
60 convert_to_prot_table(file_name, sys.argv[2])