changeset 100:ff5be711382b draft

Uploaded
author davidvanzessen
date Fri, 17 Jun 2016 05:36:32 -0400
parents 86206431cbb0
children 3cffb8a38bb1
files sequence_overview.r tmp/baseline/Baseline_Functions.r tmp/baseline/filter.r wrapper.sh
diffstat 4 files changed, 16 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/sequence_overview.r	Thu Jun 16 10:01:54 2016 -0400
+++ b/sequence_overview.r	Fri Jun 17 05:36:32 2016 -0400
@@ -2,27 +2,31 @@
 
 args <- commandArgs(trailingOnly = TRUE)
 
-input.file = args[1]
-outputdir = args[2]
-gene.classes = unlist(strsplit(args[3], ","))
-hotspot.analysis.sum.file = args[4]
+before.unique.file = args[1]
+merged.file = args[2]
+outputdir = args[3]
+gene.classes = unlist(strsplit(args[4], ","))
+hotspot.analysis.sum.file = args[5]
 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
 main.html = "index.html"
 
 setwd(outputdir)
 
-merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
 
-merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq)
+before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
 
-IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
+before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+
+IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
 IDs$best_match = as.character(IDs$best_match)
 
 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
 
-dat = data.frame(table(merged$seq_conc))
+dat = data.frame(table(before.unique$seq_conc))
 #dat = data.frame(table(merged$seq_conc, merged$Functionality))
 
 #dat = dat[dat$Freq > 1,]
@@ -138,6 +142,7 @@
 #ACGT overview
 
 NToverview = merged
+
 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_")
 
 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
--- a/tmp/baseline/Baseline_Functions.r	Thu Jun 16 10:01:54 2016 -0400
+++ b/tmp/baseline/Baseline_Functions.r	Fri Jun 17 05:36:32 2016 -0400
@@ -361,7 +361,7 @@
     grepResults = gregexpr("-*",x)
     grepResultsPos = unlist(grepResults)
     grepResultsLen =  attr(grepResults[[1]],"match.length")   
-    print(paste("x = '", x, "'", sep=""))
+    #print(paste("x = '", x, "'", sep=""))
     x = s2c(x)
     if(x[1]=="-"){
       x[1:grepResultsLen[1]] = "N"      
--- a/tmp/baseline/filter.r	Thu Jun 16 10:01:54 2016 -0400
+++ b/tmp/baseline/filter.r	Fri Jun 17 05:36:32 2016 -0400
@@ -9,8 +9,6 @@
 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
 
-
-
 #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
 
 dat = cbind(gappeddat, summarydat$AA.JUNCTION)
@@ -26,7 +24,7 @@
 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
 dat$JGene = gsub("[*].*", "", dat$JGene)
 
-print(str(dat))
+#print(str(dat))
 
 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
 
--- a/wrapper.sh	Thu Jun 16 10:01:54 2016 -0400
+++ b/wrapper.sh	Fri Jun 17 05:36:32 2016 -0400
@@ -159,7 +159,7 @@
 mkdir $outdir/sequence_overview
 
 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
-Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
+Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
 
 echo "<table border='1'>" > $outdir/base_overview.html