Mercurial > repos > davidvanzessen > mutation_analysis
changeset 28:362ef99f9405 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 08 Apr 2015 10:14:46 -0400 |
parents | c9c95b96b7cc |
children | 57d197f149c3 |
files | mutation_analysis.py |
diffstat | 1 files changed, 33 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/mutation_analysis.py Wed Apr 08 10:14:38 2015 -0400 +++ b/mutation_analysis.py Wed Apr 08 10:14:46 2015 -0400 @@ -53,12 +53,14 @@ mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + print mutationdic[ID + "_FR1"] + mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] IDlist += [ID] -AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) +AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent for mutation in mutationList: if mutation[4]: #if non silent mutation @@ -109,10 +111,34 @@ WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] - RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) - WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) - WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) - TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 + for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + RGYWCount[ID] += 1.0 / in_mutations + + for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + WRCYCount[ID] += 1.0 / in_mutations + + for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + WACount[ID] += 1.0 / in_mutations + + for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + TWCount[ID] += 1.0 / in_mutations @@ -136,12 +162,12 @@ if valuedic[gene] is 0: o.write(",0,0,0") else: - x = sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]) + x = int(round(sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]))) y = valuedic[gene] z = str(round(x / float(valuedic[gene]) * 100, 1)) o.write("," + str(x) + "," + str(y) + "," + z) #for total - x = sum([y for x,y in curr.iteritems()]) + x = int(round(sum([y for x,y in curr.iteritems()]))) y = valuedic["total"] z = str(round(x / float(valuedic["total"]) * 100, 1)) o.write("," + str(x) + "," + str(y) + "," + z + "\n")