comparison gene_identification.py @ 5:71a12810eff3 draft

Uploaded
author davidvanzessen
date Tue, 23 Sep 2014 07:50:35 -0400
parents 069419cccba4
children 3f4b4ef46c7f
comparison
equal deleted inserted replaced
4:069419cccba4 5:71a12810eff3
110 currentIDHits = hits[ID] 110 currentIDHits = hits[ID]
111 seq = dic[ID] 111 seq = dic[ID]
112 lastindex = 0 112 lastindex = 0
113 start = [0] * len(seq) 113 start = [0] * len(seq)
114 for i, regexp in enumerate(regularexpressions): #for every regular expression 114 for i, regexp in enumerate(regularexpressions): #for every regular expression
115 relativeStartLocation = lastindex - (chunklength / 2) * i
116 if relativeStartLocation < 0 or relativeStartLocation >= len(seq):
117 break
115 regex, hasVar = regexp 118 regex, hasVar = regexp
116 matches = regex.finditer(seq[lastindex:]) 119 matches = regex.finditer(seq[lastindex:])
117 for match in matches: #for every match with the current regex, only uses the first hit 120 for match in matches: #for every match with the current regex, only uses the first hit
118 lastindex += match.start() 121 lastindex += match.start()
119 start[lastindex - chunklength / 2 * i] += 1 122 print ID
123 print lastindex
124 print chunklength
125 print i
126 print seq[lastindex:]
127 print start
128 print len(seq)
129 print relativeStartLocation
130 print "-------------------"
131 start[relativeStartLocation] += 1
120 if hasVar: #if the regex has a variable nt in it 132 if hasVar: #if the regex has a variable nt in it
121 chunkstart = chunklength / 2 * i #where in the reference does this chunk start 133 chunkstart = chunklength / 2 * i #where in the reference does this chunk start
122 chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end 134 chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end
123 if key == "ca": #just calculate the variable nt score for 'ca', cheaper 135 if key == "ca": #just calculate the variable nt score for 'ca', cheaper
124 currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]]) 136 currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]])