changeset 2:173bf0f6ca1e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 9ffba16c072c7dafe4ef0b70cd2ef166fbe903d0-dirty
author iuc
date Thu, 28 Feb 2019 09:04:07 -0500
parents 1d1ca6b30fbb
children b33af081b02e
files gff_to_prot.py macros.xml
diffstat 2 files changed, 61 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gff_to_prot.py	Thu Feb 28 09:04:07 2019 -0500
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+import sys
+import os
+import csv
+
+def get_description(line, parent):
+    cols = line.split('\t')
+    labels = {}
+    for pair in cols[8].split(";"):
+        k, v = pair.split('=')
+        labels[k] = v
+
+    if (cols[2]) == "CDS" and labels["Parent"] == parent:
+        return labels.get("Note", '-')
+    return '-'
+
+
+def convert_to_prot_table(fname, output_name):
+    gff_file = open(fname)
+    output_file = open(output_name, 'w')
+    writer = csv.writer(output_file, delimiter='\t')
+    lines = gff_file.readlines()
+    gff_file.close()
+    for i, line in enumerate(lines):
+        lie = line.strip()
+        if line.startswith('#'): continue
+        cols = line.split('\t')
+        if (len(cols) < 9):
+            print("Ignoring invalid row with entries: {0}".format(cols))
+        elif (cols[2]) == "region": continue
+        elif (cols[2]) == "CDS": continue
+        elif (cols[2]) == "gene":
+            start = int(cols[3])
+            end = int(cols[4])
+            strand = cols[6].strip()
+            labels = {}
+            diff = int(abs(end - start)/3) ## What is this called?
+            for pair in cols[8].split(";"):
+                k, v = pair.split('=')
+                labels[k.strip()] = v.strip()
+
+            Rv = labels["locus_tag"].strip() # error out if not found
+            gene = labels.get('Name', '')
+            desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-'
+            vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-']
+            writer.writerow(vals)
+    output_file.close()
+
+if __name__ == "__main__":
+    usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>"
+
+    if len(sys.argv) < 3:
+        print(usage_string)
+        sys.exit(0)
+    file_name = sys.argv[1]
+    if not os.path.exists(file_name):
+        print("File not found. Exiting...")
+        print(usage_string)
+        sys.exit(0)
+    convert_to_prot_table(file_name, sys.argv[2])
--- a/macros.xml	Wed Feb 27 10:14:26 2019 -0500
+++ b/macros.xml	Thu Feb 28 09:04:07 2019 -0500
@@ -26,7 +26,7 @@
     <xml name="inputs">
         <param name="inputs" type="data" format="wig,tabular" multiple="true" label="Input .wig files" />
         <yield />
-        <param name="annotation" type="data" format="gff3" label="Input annotation" />
+        <param name="annotation" type="data" format="gff3,tabular" label="Input annotation" />
     </xml>
     <xml name="ignore_tas">
         <param name="nterm" argument="-iN" type="float" value="0" min="0" max="1" label="Ignore TAs occuring at given fraction of the N terminus." />