rnaz_annotate: AnnotateRNAz.py comparison

comparison AnnotateRNAz.py @ 0:8c250234fe1d draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_team/rnaz commit d261ddb93500e1ea309845fa3989c87c6312583d-dirty

author	bgruening
date	Wed, 30 Jan 2019 03:57:11 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:8c250234fe1d
+# AnnotateRnaz.py ---
+#
+# Filename: AnnotateRnaz.py
+# Description:
+# Author: Joerg Fallmann
+# Maintainer:
+# Created: Sat Jan 26 12:45:25 2019 (+0100)
+# Version:
+# Package-Requires: ()
+# Last-Updated: Tue Jan 29 13:52:57 2019 (+0100)
+#           By: Joerg Fallmann
+#     Update #: 188
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+# Commentary:
+# This script is a replacement for rnazAnnotate.pl
+# rnazAnnotate can not handle the output from version 2 adequatly
+# This script uses the bedtools API to fast intersect an annotation Bed
+# with output from RNAz
+# Change Log:
+#
+#
+#
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+# Code:
+#!/usr/bin/env python3
+import sys
+import glob
+import argparse
+from io import StringIO
+import gzip
+import traceback as tb
+import pybedtools
+import re
+import tempfile
+def parseargs():
+parser = argparse.ArgumentParser(description='Intersect RNAz output with Annotation from BED')
+parser.add_argument("-b", "--bed", type=str, help='Annotation BED file')
+parser.add_argument("-i", "--input", type=str, help='RNAz output')
+parser.add_argument("-o", "--bedout", type=str, help='Annotation BED output')
+parser.add_argument("-r", "--rnazout", type=str, help='Annotation rnaz output')
+return parser.parse_args()
+def annotate(bed, input, bedout, rnazout):
+try:
+pybedtools.set_tempdir('.')  # Make sure we do not write somewhere we are not supposed to
+anno = pybedtools.BedTool(bed)
+rnaz=readrnaz(input)
+tmpbed = pybedtools.BedTool(rnaztobed(rnaz), from_string=True)
+intersection = tmpbed.intersect(anno,wa=True,wb=True,s=True)  # intersect strand specific, keep all info on a and b files
+bedtornaz(intersection, rnaz, bedout, rnazout)
+return 1
+except Exception as err:
+exc_type, exc_value, exc_tb = sys.exc_info()
+tbe = tb.TracebackException(
+exc_type, exc_value, exc_tb,
+)
+print(''.join(tbe.format()),file=sys.stderr)
+def readin(file):
+try:
+if '.gz' in file:
+f = gzip.open(file,'rt')
+else:
+f = open(file,'rt')
+return f
+except Exception as err:
+exc_type, exc_value, exc_tb = sys.exc_info()
+tbe = tb.TracebackException(
+exc_type, exc_value, exc_tb,
+)
+print(''.join(tbe.format()),file=sys.stderr)
+def readrnaz(rnaz):
+try:
+toparse = readin(rnaz)
+tointersect = {}
+header = []
+for line in toparse:
+if '#' in line[0]:
+tointersect['header']=line.strip()
+line = re.sub('^#','',line)
+cont = line.strip().split('\t')
+foi = cont.index('seqID') # need to find which column contains seqID
+sf = cont.index('start') # need to find which column contains start
+ef = cont.index('end') # need to find which column contains end
+if 'strand' in cont:# need to find which column contains strand
+df = cont.index('strand')
+else:
+df = None
+else:
+content = line.strip().split('\t')
+newid=re.split('\.|\,|\s|\\|\/|\_', content[foi])[1]  # I can only hope that we have species.chromosome.whatever as annotation in aln or maf, if not this is hardly parseable
+if df:
+longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', content[df]])
+tointersect[longid] = content
+else:
+longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '+'])
+tointersect[longid] = content
+longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '-'])
+tointersect[longid] = content
+return tointersect
+except Exception as err:
+exc_type, exc_value, exc_tb = sys.exc_info()
+tbe = tb.TracebackException(
+exc_type, exc_value, exc_tb,
+)
+print(''.join(tbe.format()),file=sys.stderr)
+def rnaztobed(rnaz):
+try:
+tmpbed = []
+for key in rnaz:
+if key != 'header':
+tmpbed.append('\t'.join(key.split('_')))
+return '\n'.join(tmpbed)
+except Exception as err:
+exc_type, exc_value, exc_tb = sys.exc_info()
+tbe = tb.TracebackException(
+exc_type, exc_value, exc_tb,
+)
+print(''.join(tbe.format()),file=sys.stderr)
+def bedtornaz(bed, rnaz, bedout, rnazout):
+try:
+b = open(bedout,'w')
+r = open(rnazout,'w')
+annotatedbed=[]
+annotatedrnaz=[]
+annotatedrnaz.append(str.join('\t',[rnaz['header'],'Annotation']))
+for line in open(bed.fn):
+out = line.strip().split("\t")
+annotatedbed.append(str.join('\t',out[0:3]+out[9:10]+out[4:6]))
+key = str.join('_',out[0:6])
+annotatedrnaz.append(str.join('\t',rnaz[key]+out[9:10]))
+print(str.join('\n', annotatedbed),file=b)
+print(str.join('\n', annotatedrnaz),file=r)
+except Exception as err:
+exc_type, exc_value, exc_tb = sys.exc_info()
+tbe = tb.TracebackException(
+exc_type, exc_value, exc_tb,
+)
+print(''.join(tbe.format()),file=sys.stderr)
+def closefile(file):
+try:
+file.close()
+except Exception as err:
+exc_type, exc_value, exc_tb = sys.exc_info()
+tbe = tb.TracebackException(
+exc_type, exc_value, exc_tb,
+)
+print(''.join(tbe.format()),file=sys.stderr)
+####################
+####    MAIN    ####
+####################
+if __name__ == '__main__':
+args=parseargs()
+annotate(args.bed, args.input, args.bedout, args.rnazout)
+######################################################################
+# AnnotateRnaz.py ends here

Mercurial > repos > bgruening > rnaz_annotate

comparison AnnotateRNAz.py @ 0:8c250234fe1d draft default tip