Mercurial > repos > davidvanzessen > imgt_loader_igg
changeset 5:387fce4a1dd4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 07 Jul 2014 09:48:59 -0400 |
parents | 021d39f6bb0e |
children | 5b030e48b308 |
files | imgtconvert.py imgtconvert.sh |
diffstat | 2 files changed, 6 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/imgtconvert.py Fri Jun 06 04:34:44 2014 -0400 +++ b/imgtconvert.py Mon Jul 07 09:48:59 2014 -0400 @@ -47,9 +47,9 @@ outFile = args.output -fSummary = pd.read_csv(triplets[0][0], sep="\t") -fSequence = pd.read_csv(triplets[0][1], sep="\t") -fJunction = pd.read_csv(triplets[0][2], sep="\t") +fSummary = pd.read_csv(triplets[0][0], sep="\t", low_memory=False) +fSequence = pd.read_csv(triplets[0][1], sep="\t", low_memory=False) +fJunction = pd.read_csv(triplets[0][2], sep="\t", low_memory=False) tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]] tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"] @@ -186,5 +186,6 @@ outFrame["VDJ Frame"] = tmp outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) safeLength = lambda x: len(x) if type(x) == str else 0 -outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? +outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? +#outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? outFrame.to_csv(outFile, sep="\t", index=False, index_label="index")