Mercurial > repos > bgruening > rnaz_annotate
comparison AnnotateRNAz.py @ 0:8c250234fe1d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_team/rnaz commit d261ddb93500e1ea309845fa3989c87c6312583d-dirty
| author | bgruening |
|---|---|
| date | Wed, 30 Jan 2019 03:57:11 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8c250234fe1d |
|---|---|
| 1 # AnnotateRnaz.py --- | |
| 2 # | |
| 3 # Filename: AnnotateRnaz.py | |
| 4 # Description: | |
| 5 # Author: Joerg Fallmann | |
| 6 # Maintainer: | |
| 7 # Created: Sat Jan 26 12:45:25 2019 (+0100) | |
| 8 # Version: | |
| 9 # Package-Requires: () | |
| 10 # Last-Updated: Tue Jan 29 13:52:57 2019 (+0100) | |
| 11 # By: Joerg Fallmann | |
| 12 # Update #: 188 | |
| 13 # URL: | |
| 14 # Doc URL: | |
| 15 # Keywords: | |
| 16 # Compatibility: | |
| 17 # | |
| 18 # | |
| 19 | |
| 20 # Commentary: | |
| 21 # This script is a replacement for rnazAnnotate.pl | |
| 22 # rnazAnnotate can not handle the output from version 2 adequatly | |
| 23 # This script uses the bedtools API to fast intersect an annotation Bed | |
| 24 # with output from RNAz | |
| 25 | |
| 26 # Change Log: | |
| 27 # | |
| 28 # | |
| 29 # | |
| 30 # | |
| 31 # This program is free software: you can redistribute it and/or modify | |
| 32 # it under the terms of the GNU General Public License as published by | |
| 33 # the Free Software Foundation, either version 3 of the License, or (at | |
| 34 # your option) any later version. | |
| 35 # | |
| 36 # This program is distributed in the hope that it will be useful, but | |
| 37 # WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 38 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 39 # General Public License for more details. | |
| 40 # | |
| 41 # You should have received a copy of the GNU General Public License | |
| 42 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
| 43 # | |
| 44 # | |
| 45 | |
| 46 # Code: | |
| 47 | |
| 48 #!/usr/bin/env python3 | |
| 49 | |
| 50 import sys | |
| 51 import glob | |
| 52 import argparse | |
| 53 from io import StringIO | |
| 54 import gzip | |
| 55 import traceback as tb | |
| 56 import pybedtools | |
| 57 import re | |
| 58 import tempfile | |
| 59 | |
| 60 def parseargs(): | |
| 61 parser = argparse.ArgumentParser(description='Intersect RNAz output with Annotation from BED') | |
| 62 parser.add_argument("-b", "--bed", type=str, help='Annotation BED file') | |
| 63 parser.add_argument("-i", "--input", type=str, help='RNAz output') | |
| 64 parser.add_argument("-o", "--bedout", type=str, help='Annotation BED output') | |
| 65 parser.add_argument("-r", "--rnazout", type=str, help='Annotation rnaz output') | |
| 66 return parser.parse_args() | |
| 67 | |
| 68 def annotate(bed, input, bedout, rnazout): | |
| 69 try: | |
| 70 | |
| 71 pybedtools.set_tempdir('.') # Make sure we do not write somewhere we are not supposed to | |
| 72 anno = pybedtools.BedTool(bed) | |
| 73 rnaz=readrnaz(input) | |
| 74 tmpbed = pybedtools.BedTool(rnaztobed(rnaz), from_string=True) | |
| 75 | |
| 76 intersection = tmpbed.intersect(anno,wa=True,wb=True,s=True) # intersect strand specific, keep all info on a and b files | |
| 77 | |
| 78 bedtornaz(intersection, rnaz, bedout, rnazout) | |
| 79 | |
| 80 return 1 | |
| 81 | |
| 82 except Exception as err: | |
| 83 exc_type, exc_value, exc_tb = sys.exc_info() | |
| 84 tbe = tb.TracebackException( | |
| 85 exc_type, exc_value, exc_tb, | |
| 86 ) | |
| 87 print(''.join(tbe.format()),file=sys.stderr) | |
| 88 | |
| 89 def readin(file): | |
| 90 try: | |
| 91 if '.gz' in file: | |
| 92 f = gzip.open(file,'rt') | |
| 93 else: | |
| 94 f = open(file,'rt') | |
| 95 return f | |
| 96 | |
| 97 except Exception as err: | |
| 98 exc_type, exc_value, exc_tb = sys.exc_info() | |
| 99 tbe = tb.TracebackException( | |
| 100 exc_type, exc_value, exc_tb, | |
| 101 ) | |
| 102 print(''.join(tbe.format()),file=sys.stderr) | |
| 103 | |
| 104 def readrnaz(rnaz): | |
| 105 try: | |
| 106 toparse = readin(rnaz) | |
| 107 tointersect = {} | |
| 108 header = [] | |
| 109 for line in toparse: | |
| 110 if '#' in line[0]: | |
| 111 tointersect['header']=line.strip() | |
| 112 line = re.sub('^#','',line) | |
| 113 cont = line.strip().split('\t') | |
| 114 foi = cont.index('seqID') # need to find which column contains seqID | |
| 115 sf = cont.index('start') # need to find which column contains start | |
| 116 ef = cont.index('end') # need to find which column contains end | |
| 117 if 'strand' in cont:# need to find which column contains strand | |
| 118 df = cont.index('strand') | |
| 119 else: | |
| 120 df = None | |
| 121 else: | |
| 122 content = line.strip().split('\t') | |
| 123 newid=re.split('\.|\,|\s|\\|\/|\_', content[foi])[1] # I can only hope that we have species.chromosome.whatever as annotation in aln or maf, if not this is hardly parseable | |
| 124 if df: | |
| 125 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', content[df]]) | |
| 126 tointersect[longid] = content | |
| 127 else: | |
| 128 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '+']) | |
| 129 tointersect[longid] = content | |
| 130 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '-']) | |
| 131 tointersect[longid] = content | |
| 132 | |
| 133 return tointersect | |
| 134 | |
| 135 except Exception as err: | |
| 136 exc_type, exc_value, exc_tb = sys.exc_info() | |
| 137 tbe = tb.TracebackException( | |
| 138 exc_type, exc_value, exc_tb, | |
| 139 ) | |
| 140 print(''.join(tbe.format()),file=sys.stderr) | |
| 141 | |
| 142 | |
| 143 def rnaztobed(rnaz): | |
| 144 try: | |
| 145 tmpbed = [] | |
| 146 for key in rnaz: | |
| 147 if key != 'header': | |
| 148 tmpbed.append('\t'.join(key.split('_'))) | |
| 149 | |
| 150 return '\n'.join(tmpbed) | |
| 151 | |
| 152 except Exception as err: | |
| 153 exc_type, exc_value, exc_tb = sys.exc_info() | |
| 154 tbe = tb.TracebackException( | |
| 155 exc_type, exc_value, exc_tb, | |
| 156 ) | |
| 157 print(''.join(tbe.format()),file=sys.stderr) | |
| 158 | |
| 159 def bedtornaz(bed, rnaz, bedout, rnazout): | |
| 160 try: | |
| 161 b = open(bedout,'w') | |
| 162 r = open(rnazout,'w') | |
| 163 | |
| 164 annotatedbed=[] | |
| 165 annotatedrnaz=[] | |
| 166 annotatedrnaz.append(str.join('\t',[rnaz['header'],'Annotation'])) | |
| 167 for line in open(bed.fn): | |
| 168 out = line.strip().split("\t") | |
| 169 annotatedbed.append(str.join('\t',out[0:3]+out[9:10]+out[4:6])) | |
| 170 key = str.join('_',out[0:6]) | |
| 171 annotatedrnaz.append(str.join('\t',rnaz[key]+out[9:10])) | |
| 172 | |
| 173 print(str.join('\n', annotatedbed),file=b) | |
| 174 print(str.join('\n', annotatedrnaz),file=r) | |
| 175 | |
| 176 except Exception as err: | |
| 177 exc_type, exc_value, exc_tb = sys.exc_info() | |
| 178 tbe = tb.TracebackException( | |
| 179 exc_type, exc_value, exc_tb, | |
| 180 ) | |
| 181 print(''.join(tbe.format()),file=sys.stderr) | |
| 182 | |
| 183 | |
| 184 def closefile(file): | |
| 185 try: | |
| 186 file.close() | |
| 187 | |
| 188 except Exception as err: | |
| 189 exc_type, exc_value, exc_tb = sys.exc_info() | |
| 190 tbe = tb.TracebackException( | |
| 191 exc_type, exc_value, exc_tb, | |
| 192 ) | |
| 193 print(''.join(tbe.format()),file=sys.stderr) | |
| 194 | |
| 195 | |
| 196 | |
| 197 | |
| 198 #################### | |
| 199 #### MAIN #### | |
| 200 #################### | |
| 201 if __name__ == '__main__': | |
| 202 args=parseargs() | |
| 203 annotate(args.bed, args.input, args.bedout, args.rnazout) | |
| 204 ###################################################################### | |
| 205 # AnnotateRnaz.py ends here |
