Mercurial > repos > iuc > transit_resampling
changeset 2:173bf0f6ca1e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
| author | iuc |
|---|---|
| date | Thu, 28 Feb 2019 09:04:07 -0500 |
| parents | 1d1ca6b30fbb |
| children | b33af081b02e |
| files | gff_to_prot.py macros.xml |
| diffstat | 2 files changed, 61 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff_to_prot.py Thu Feb 28 09:04:07 2019 -0500 @@ -0,0 +1,60 @@ +#!/usr/bin/env python +import sys +import os +import csv + +def get_description(line, parent): + cols = line.split('\t') + labels = {} + for pair in cols[8].split(";"): + k, v = pair.split('=') + labels[k] = v + + if (cols[2]) == "CDS" and labels["Parent"] == parent: + return labels.get("Note", '-') + return '-' + + +def convert_to_prot_table(fname, output_name): + gff_file = open(fname) + output_file = open(output_name, 'w') + writer = csv.writer(output_file, delimiter='\t') + lines = gff_file.readlines() + gff_file.close() + for i, line in enumerate(lines): + lie = line.strip() + if line.startswith('#'): continue + cols = line.split('\t') + if (len(cols) < 9): + print("Ignoring invalid row with entries: {0}".format(cols)) + elif (cols[2]) == "region": continue + elif (cols[2]) == "CDS": continue + elif (cols[2]) == "gene": + start = int(cols[3]) + end = int(cols[4]) + strand = cols[6].strip() + labels = {} + diff = int(abs(end - start)/3) ## What is this called? + for pair in cols[8].split(";"): + k, v = pair.split('=') + labels[k.strip()] = v.strip() + + Rv = labels["locus_tag"].strip() # error out if not found + gene = labels.get('Name', '') + desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-' + vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-'] + writer.writerow(vals) + output_file.close() + +if __name__ == "__main__": + usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>" + + if len(sys.argv) < 3: + print(usage_string) + sys.exit(0) + file_name = sys.argv[1] + if not os.path.exists(file_name): + print("File not found. Exiting...") + print(usage_string) + sys.exit(0) + convert_to_prot_table(file_name, sys.argv[2])
--- a/macros.xml Wed Feb 27 10:14:26 2019 -0500 +++ b/macros.xml Thu Feb 28 09:04:07 2019 -0500 @@ -26,7 +26,7 @@ <xml name="inputs"> <param name="inputs" type="data" format="wig,tabular" multiple="true" label="Input .wig files" /> <yield /> - <param name="annotation" type="data" format="gff3" label="Input annotation" /> + <param name="annotation" type="data" format="gff3,tabular" label="Input annotation" /> </xml> <xml name="ignore_tas"> <param name="nterm" argument="-iN" type="float" value="0" min="0" max="1" label="Ignore TAs occuring at given fraction of the N terminus." />
