Mercurial > repos > davidvanzessen > mutation_analysis
changeset 100:ff5be711382b draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 17 Jun 2016 05:36:32 -0400 |
parents | 86206431cbb0 |
children | 3cffb8a38bb1 |
files | sequence_overview.r tmp/baseline/Baseline_Functions.r tmp/baseline/filter.r wrapper.sh |
diffstat | 4 files changed, 16 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/sequence_overview.r Thu Jun 16 10:01:54 2016 -0400 +++ b/sequence_overview.r Fri Jun 17 05:36:32 2016 -0400 @@ -2,27 +2,31 @@ args <- commandArgs(trailingOnly = TRUE) -input.file = args[1] -outputdir = args[2] -gene.classes = unlist(strsplit(args[3], ",")) -hotspot.analysis.sum.file = args[4] +before.unique.file = args[1] +merged.file = args[2] +outputdir = args[3] +gene.classes = unlist(strsplit(args[4], ",")) +hotspot.analysis.sum.file = args[5] NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") NTsum.file = paste(outputdir, "ntsum.txt", sep="/") main.html = "index.html" setwd(outputdir) -merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") -merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq) +before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] -IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] +before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) + +IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] IDs$best_match = as.character(IDs$best_match) #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) -dat = data.frame(table(merged$seq_conc)) +dat = data.frame(table(before.unique$seq_conc)) #dat = data.frame(table(merged$seq_conc, merged$Functionality)) #dat = dat[dat$Freq > 1,] @@ -138,6 +142,7 @@ #ACGT overview NToverview = merged + NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
--- a/tmp/baseline/Baseline_Functions.r Thu Jun 16 10:01:54 2016 -0400 +++ b/tmp/baseline/Baseline_Functions.r Fri Jun 17 05:36:32 2016 -0400 @@ -361,7 +361,7 @@ grepResults = gregexpr("-*",x) grepResultsPos = unlist(grepResults) grepResultsLen = attr(grepResults[[1]],"match.length") - print(paste("x = '", x, "'", sep="")) + #print(paste("x = '", x, "'", sep="")) x = s2c(x) if(x[1]=="-"){ x[1:grepResultsLen[1]] = "N"
--- a/tmp/baseline/filter.r Thu Jun 16 10:01:54 2016 -0400 +++ b/tmp/baseline/filter.r Fri Jun 17 05:36:32 2016 -0400 @@ -9,8 +9,6 @@ summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) - - #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) dat = cbind(gappeddat, summarydat$AA.JUNCTION) @@ -26,7 +24,7 @@ dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) dat$JGene = gsub("[*].*", "", dat$JGene) -print(str(dat)) +#print(str(dat)) dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
--- a/wrapper.sh Thu Jun 16 10:01:54 2016 -0400 +++ b/wrapper.sh Fri Jun 17 05:36:32 2016 -0400 @@ -159,7 +159,7 @@ mkdir $outdir/sequence_overview #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 -Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 +Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 echo "<table border='1'>" > $outdir/base_overview.html