# HG changeset patch # User davidvanzessen # Date 1428502486 14400 # Node ID 362ef99f94059408c41be706c66be20d95072118 # Parent c9c95b96b7cc04f214ecc9e9be6afcd809b0fd7a Uploaded diff -r c9c95b96b7cc -r 362ef99f9405 mutation_analysis.py --- a/mutation_analysis.py Wed Apr 08 10:14:38 2015 -0400 +++ b/mutation_analysis.py Wed Apr 08 10:14:46 2015 -0400 @@ -53,12 +53,14 @@ mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + print mutationdic[ID + "_FR1"] + mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] IDlist += [ID] -AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) +AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent for mutation in mutationList: if mutation[4]: #if non silent mutation @@ -109,10 +111,34 @@ WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] - RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) - WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) - WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) - TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 + for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + RGYWCount[ID] += 1.0 / in_mutations + + for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + WRCYCount[ID] += 1.0 / in_mutations + + for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + WACount[ID] += 1.0 / in_mutations + + for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) + if not z or z == "CDR3": + continue + in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) + if in_mutations > 0: + TWCount[ID] += 1.0 / in_mutations @@ -136,12 +162,12 @@ if valuedic[gene] is 0: o.write(",0,0,0") else: - x = sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]) + x = int(round(sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]))) y = valuedic[gene] z = str(round(x / float(valuedic[gene]) * 100, 1)) o.write("," + str(x) + "," + str(y) + "," + z) #for total - x = sum([y for x,y in curr.iteritems()]) + x = int(round(sum([y for x,y in curr.iteritems()]))) y = valuedic["total"] z = str(round(x / float(valuedic["total"]) * 100, 1)) o.write("," + str(x) + "," + str(y) + "," + z + "\n")