# HG changeset patch # User davidvanzessen # Date 1411473035 14400 # Node ID 71a12810eff3647aa792301738e55bb6bf86f72b # Parent 069419cccba4c17300ffc3619b51593186e4bfdc Uploaded diff -r 069419cccba4 -r 71a12810eff3 gene_identification.py --- a/gene_identification.py Mon Sep 22 10:19:36 2014 -0400 +++ b/gene_identification.py Tue Sep 23 07:50:35 2014 -0400 @@ -112,11 +112,23 @@ lastindex = 0 start = [0] * len(seq) for i, regexp in enumerate(regularexpressions): #for every regular expression + relativeStartLocation = lastindex - (chunklength / 2) * i + if relativeStartLocation < 0 or relativeStartLocation >= len(seq): + break regex, hasVar = regexp matches = regex.finditer(seq[lastindex:]) for match in matches: #for every match with the current regex, only uses the first hit lastindex += match.start() - start[lastindex - chunklength / 2 * i] += 1 + print ID + print lastindex + print chunklength + print i + print seq[lastindex:] + print start + print len(seq) + print relativeStartLocation + print "-------------------" + start[relativeStartLocation] += 1 if hasVar: #if the regex has a variable nt in it chunkstart = chunklength / 2 * i #where in the reference does this chunk start chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end