Mercurial > repos > davidvanzessen > mutation_analysis
comparison gene_identification.py @ 5:71a12810eff3 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 23 Sep 2014 07:50:35 -0400 |
parents | 069419cccba4 |
children | 3f4b4ef46c7f |
comparison
equal
deleted
inserted
replaced
4:069419cccba4 | 5:71a12810eff3 |
---|---|
110 currentIDHits = hits[ID] | 110 currentIDHits = hits[ID] |
111 seq = dic[ID] | 111 seq = dic[ID] |
112 lastindex = 0 | 112 lastindex = 0 |
113 start = [0] * len(seq) | 113 start = [0] * len(seq) |
114 for i, regexp in enumerate(regularexpressions): #for every regular expression | 114 for i, regexp in enumerate(regularexpressions): #for every regular expression |
115 relativeStartLocation = lastindex - (chunklength / 2) * i | |
116 if relativeStartLocation < 0 or relativeStartLocation >= len(seq): | |
117 break | |
115 regex, hasVar = regexp | 118 regex, hasVar = regexp |
116 matches = regex.finditer(seq[lastindex:]) | 119 matches = regex.finditer(seq[lastindex:]) |
117 for match in matches: #for every match with the current regex, only uses the first hit | 120 for match in matches: #for every match with the current regex, only uses the first hit |
118 lastindex += match.start() | 121 lastindex += match.start() |
119 start[lastindex - chunklength / 2 * i] += 1 | 122 print ID |
123 print lastindex | |
124 print chunklength | |
125 print i | |
126 print seq[lastindex:] | |
127 print start | |
128 print len(seq) | |
129 print relativeStartLocation | |
130 print "-------------------" | |
131 start[relativeStartLocation] += 1 | |
120 if hasVar: #if the regex has a variable nt in it | 132 if hasVar: #if the regex has a variable nt in it |
121 chunkstart = chunklength / 2 * i #where in the reference does this chunk start | 133 chunkstart = chunklength / 2 * i #where in the reference does this chunk start |
122 chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end | 134 chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end |
123 if key == "ca": #just calculate the variable nt score for 'ca', cheaper | 135 if key == "ca": #just calculate the variable nt score for 'ca', cheaper |
124 currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]]) | 136 currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]]) |