comparison imgt_loader.py @ 7:a9053212a462 draft

Uploaded
author davidvanzessen
date Mon, 05 Jan 2015 09:30:08 -0500
parents 3287f7b9c47d
children
comparison
equal deleted inserted replaced
6:8b46fca04595 7:a9053212a462
126 126
127 outFrame["Top V Gene"] = outFrame["Top V Gene"].apply(lambda x: filterGenes(x, vPattern)) 127 outFrame["Top V Gene"] = outFrame["Top V Gene"].apply(lambda x: filterGenes(x, vPattern))
128 outFrame["Top D Gene"] = outFrame["Top D Gene"].apply(lambda x: filterGenes(x, dPattern)) 128 outFrame["Top D Gene"] = outFrame["Top D Gene"].apply(lambda x: filterGenes(x, dPattern))
129 outFrame["Top J Gene"] = outFrame["Top J Gene"].apply(lambda x: filterGenes(x, jPattern)) 129 outFrame["Top J Gene"] = outFrame["Top J Gene"].apply(lambda x: filterGenes(x, jPattern))
130 130
131 print outFrame
132 131
133 tmp = outFrame["VDJ Frame"] 132 tmp = outFrame["VDJ Frame"]
134 tmp = tmp.replace("in-frame", "In-frame") 133 tmp = tmp.replace("in-frame", "In-frame")
135 tmp = tmp.replace("null", "Out-of-frame") 134 tmp = tmp.replace("null", "Out-of-frame")
136 tmp = tmp.replace("out-of-frame", "Out-of-frame") 135 tmp = tmp.replace("out-of-frame", "Out-of-frame")
137 outFrame["VDJ Frame"] = tmp 136 outFrame["VDJ Frame"] = tmp
138 outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) 137 outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len)
139 safeLength = lambda x: len(x) if type(x) == str else 0 138 safeLength = lambda x: len(x) if type(x) == str else 0
140 outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? 139 #outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows?
141 #outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? 140 #outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows?
142 outFrame.to_csv(outFile, sep="\t", index=False, index_label="index") 141 outFrame.to_csv(outFile, sep="\t", index=False, index_label="index")