Mercurial > repos > iuc > transit_resampling
comparison gff_to_prot.py @ 2:173bf0f6ca1e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
| author | iuc |
|---|---|
| date | Thu, 28 Feb 2019 09:04:07 -0500 |
| parents | |
| children | 3fcb70c1ca78 |
comparison
equal
deleted
inserted
replaced
| 1:1d1ca6b30fbb | 2:173bf0f6ca1e |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import sys | |
| 3 import os | |
| 4 import csv | |
| 5 | |
| 6 def get_description(line, parent): | |
| 7 cols = line.split('\t') | |
| 8 labels = {} | |
| 9 for pair in cols[8].split(";"): | |
| 10 k, v = pair.split('=') | |
| 11 labels[k] = v | |
| 12 | |
| 13 if (cols[2]) == "CDS" and labels["Parent"] == parent: | |
| 14 return labels.get("Note", '-') | |
| 15 return '-' | |
| 16 | |
| 17 | |
| 18 def convert_to_prot_table(fname, output_name): | |
| 19 gff_file = open(fname) | |
| 20 output_file = open(output_name, 'w') | |
| 21 writer = csv.writer(output_file, delimiter='\t') | |
| 22 lines = gff_file.readlines() | |
| 23 gff_file.close() | |
| 24 for i, line in enumerate(lines): | |
| 25 lie = line.strip() | |
| 26 if line.startswith('#'): continue | |
| 27 cols = line.split('\t') | |
| 28 if (len(cols) < 9): | |
| 29 print("Ignoring invalid row with entries: {0}".format(cols)) | |
| 30 elif (cols[2]) == "region": continue | |
| 31 elif (cols[2]) == "CDS": continue | |
| 32 elif (cols[2]) == "gene": | |
| 33 start = int(cols[3]) | |
| 34 end = int(cols[4]) | |
| 35 strand = cols[6].strip() | |
| 36 labels = {} | |
| 37 diff = int(abs(end - start)/3) ## What is this called? | |
| 38 for pair in cols[8].split(";"): | |
| 39 k, v = pair.split('=') | |
| 40 labels[k.strip()] = v.strip() | |
| 41 | |
| 42 Rv = labels["locus_tag"].strip() # error out if not found | |
| 43 gene = labels.get('Name', '') | |
| 44 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-' | |
| 45 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-'] | |
| 46 writer.writerow(vals) | |
| 47 output_file.close() | |
| 48 | |
| 49 if __name__ == "__main__": | |
| 50 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>" | |
| 51 | |
| 52 if len(sys.argv) < 3: | |
| 53 print(usage_string) | |
| 54 sys.exit(0) | |
| 55 file_name = sys.argv[1] | |
| 56 if not os.path.exists(file_name): | |
| 57 print("File not found. Exiting...") | |
| 58 print(usage_string) | |
| 59 sys.exit(0) | |
| 60 convert_to_prot_table(file_name, sys.argv[2]) |
