Mercurial > repos > davidvanzessen > argalaxy_tools
comparison igblast/igblast.r @ 3:c7c7000de220 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 30 Jul 2015 09:31:38 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:d8d61e65dfd5 | 3:c7c7000de220 |
|---|---|
| 1 args <- commandArgs(trailingOnly = TRUE) | |
| 2 | |
| 3 infile=args[1] | |
| 4 outfile=args[2] | |
| 5 | |
| 6 blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="") | |
| 7 | |
| 8 blasted$ID = 1:nrow(blasted) | |
| 9 blasted$VDJ.Frame = "Out-of-frame" | |
| 10 | |
| 11 search = blasted$inFrame == "true" & blasted$noStop == "false" | |
| 12 if(sum(search) > 0){ | |
| 13 blasted[search ,]$VDJ.Frame = "In-frame with stop codon" | |
| 14 } | |
| 15 | |
| 16 search = blasted$inFrame == "true" & blasted$noStop == "true" | |
| 17 if(sum(search) > 0){ | |
| 18 blasted[search ,]$VDJ.Frame = "In-frame" | |
| 19 } | |
| 20 | |
| 21 blasted$Top.V.Gene = blasted$vSegment | |
| 22 blasted$Top.D.Gene = blasted$dSegment | |
| 23 blasted$Top.J.Gene = blasted$jSegment | |
| 24 blasted$CDR1.Seq = blasted$cdr1aa | |
| 25 blasted$CDR1.Length = nchar(blasted$CDR1.Seq) | |
| 26 blasted$CDR2.Seq = blasted$cdr2aa | |
| 27 blasted$CDR2.Length = nchar(blasted$CDR2.Seq) | |
| 28 blasted$CDR3.Seq = blasted$cdr3aa | |
| 29 blasted$CDR3.Length = nchar(blasted$CDR3.Seq) | |
| 30 blasted$CDR3.Seq.DNA = blasted$cdr3nt | |
| 31 blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA) | |
| 32 blasted$Strand = "+/-" | |
| 33 blasted$CDR3.Found.How = "found" | |
| 34 | |
| 35 search = blasted$cdr3nt == "" | |
| 36 if(sum(search) > 0){ | |
| 37 blasted[search,]$CDR3.Found.How = "NOT_FOUND" | |
| 38 } | |
| 39 | |
| 40 blasted$AA.JUNCTION = blasted$CDR3.Seq | |
| 41 | |
| 42 n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION") | |
| 43 | |
| 44 n[!(n %in% names(blasted))] | |
| 45 | |
| 46 blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")] | |
| 47 | |
| 48 names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION") | |
| 49 | |
| 50 #duplicate rows based on frequency.count | |
| 51 blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),] | |
| 52 blasted$ID = 1:nrow(blasted) | |
| 53 | |
| 54 blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")] | |
| 55 | |
| 56 write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T) |
