comparison genetrack_util.py @ 4:a952b6740fb9 draft

Uploaded
author greg
date Sat, 21 Nov 2015 08:57:17 -0500
parents 0368815ae4d5
children a7da50a23270
comparison
equal deleted inserted replaced
3:fd4daf02a338 4:a952b6740fb9
3 import numpy 3 import numpy
4 import re 4 import re
5 import subprocess 5 import subprocess
6 import sys 6 import sys
7 import tempfile 7 import tempfile
8
9 GFF_EXT = 'gff'
10 SCIDX_EXT = 'scidx'
8 11
9 ROMAN = ['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 12 ROMAN = ['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X',
10 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX', 13 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX',
11 'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX', 14 'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX',
12 'XXX'] 15 'XXX']
102 def is_valid(self, line): 105 def is_valid(self, line):
103 if len(line) not in [4, 5, 9]: 106 if len(line) not in [4, 5, 9]:
104 return False 107 return False
105 try: 108 try:
106 [int(i) for i in line[1:]] 109 [int(i) for i in line[1:]]
107 self.format = 'ssccidx' 110 self.format = SCIDX_EXT
108 return True 111 return True
109 except ValueError: 112 except ValueError:
110 try: 113 try:
111 if len(line) < 6: 114 if len(line) < 6:
112 return False 115 return False
113 [int(line[4]), int(line[5])] 116 [int(line[4]), int(line[5])]
114 self.format = 'gff' 117 self.format = GFF_EXT
115 return True 118 return True
116 except ValueError: 119 except ValueError:
117 return False 120 return False
118 121
119 def next_valid(self): 122 def next_valid(self):
128 if s > 0: 131 if s > 0:
129 # Skip initial line(s) of file 132 # Skip initial line(s) of file
130 pass 133 pass
131 134
132 def parse_line(self, line): 135 def parse_line(self, line):
133 if self.format == 'ssccidx': 136 if self.format == SCIDX_EXT:
134 return [int(line[1]), int(line[2]), int(line[3])] 137 return [int(line[1]), int(line[2]), int(line[3])]
135 else: 138 else:
136 return [int(line[3]), line[6], line[5]] 139 return [int(line[3]), line[6], line[5]]
137 140
138 def chromosome_name(self): 141 def chromosome_name(self):
168 # Don't retain reference anymore to save memory 171 # Don't retain reference anymore to save memory
169 del self.data 172 del self.data
170 return data 173 return data
171 174
172 def add_read(self, read): 175 def add_read(self, read):
173 if self.format == 'ssccidx': 176 if self.format == SCIDX_EXT:
174 self.data.append(read) 177 self.data.append(read)
175 else: 178 else:
176 index, strand, value = read 179 index, strand, value = read
177 if value == '' or value == '.': 180 if value == '' or value == '.':
178 value = 1 181 value = 1