# HG changeset patch
# User davidvanzessen
# Date 1462976973 14400
# Node ID b523ce95d85714d7caab9377553de057ec5caf72
# Parent c5c86d15cb94306f4788387ea44aee87e80e0bd8
Uploaded
diff -r c5c86d15cb94 -r b523ce95d857 merge_and_filter.r
--- a/merge_and_filter.r Mon May 09 03:56:38 2016 -0400
+++ b/merge_and_filter.r Wed May 11 10:29:33 2016 -0400
@@ -63,10 +63,11 @@
unmatched = summ[!higher_than,]
unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
unmatched$best_match = paste("unmatched,", unmatched$best_match)
+ summ[!higher_than,"best_match"] = paste("unmatched,", summ[!higher_than,"best_match"])
}
if(any(higher_than)){
- summ = summ[higher_than,]
+ #summ = summ[higher_than,]
}
print(paste("Number of matched sequences:", nrow(summ)))
diff -r c5c86d15cb94 -r b523ce95d857 mutation_analysis.py
--- a/mutation_analysis.py Mon May 09 03:56:38 2016 -0400
+++ b/mutation_analysis.py Wed May 11 10:29:33 2016 -0400
@@ -228,7 +228,6 @@
l = int(l / 2)
if len(lst) % 2 == 0:
- print "list length is", l
return float(lst[l] + lst[(l - 1)]) / 2.0
else:
return lst[l]
@@ -246,8 +245,7 @@
with open(directory + "all_" + fname + "_value.txt", 'r') as v:
valuedic["total_" + fname] = float(v.readlines()[0].rstrip())
-print valuedic
-
+
def get_xyz(lst, gene, f, fname):
x = int(round(f(lst)))
y = valuedic[gene + "_" + fname]
@@ -265,7 +263,7 @@
o.write(typ + " (%)")
curr = dic[typ]
for gene in genes:
- geneMatcher = re.compile(".*" + gene + ".*")
+ geneMatcher = re.compile("^" + gene + ".*")
if valuedic[gene + "_" + fname] is 0:
o.write(",0,0,0")
else:
diff -r c5c86d15cb94 -r b523ce95d857 mutation_analysis.r
--- a/mutation_analysis.r Mon May 09 03:56:38 2016 -0400
+++ b/mutation_analysis.r Wed May 11 10:29:33 2016 -0400
@@ -169,7 +169,7 @@
setwd(outputdir)
calculate_result = function(i, gene, dat, matrx, f, fname, name){
- tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
+ tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),]
j = i - 1
x = (j * 3) + 1
@@ -267,7 +267,9 @@
write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep=""))
- cat(length(tmp$Sequence.ID), file=paste(name, "_", fname, "_n.txt" ,sep=""))
+ cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep=""))
+
+ print(paste(fname, name, nrow(tmp)))
matrx
}
diff -r c5c86d15cb94 -r b523ce95d857 sequence_overview.r
--- a/sequence_overview.r Mon May 09 03:56:38 2016 -0400
+++ b/sequence_overview.r Wed May 11 10:29:33 2016 -0400
@@ -16,7 +16,7 @@
dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT, dat$FR2.IMGT, dat$FR3.IMGT)
-IDs = dat[,c("Sequence.ID", "seq_conc", "best_match")]
+IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
IDs$best_match = as.character(IDs$best_match)
#dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
diff -r c5c86d15cb94 -r b523ce95d857 wrapper.sh
--- a/wrapper.sh Mon May 09 03:56:38 2016 -0400
+++ b/wrapper.sh Wed May 11 10:29:33 2016 -0400
@@ -86,7 +86,7 @@
echo "---------------- mutation_analysis.r ----------------"
echo "---------------- mutation_analysis.r ----------------
" >> $output
-genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
+genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
echo "R mutation analysis"
Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
@@ -99,7 +99,6 @@
echo "---------------- mutation_analysis.py ----------------
" >> $output
python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
-echo "R AA histogram"
echo "---------------- aa_histogram.r ----------------"
echo "---------------- aa_histogram.r ----------------
" >> $output
@@ -141,15 +140,18 @@
tmp=`cat $outdir/${gene}_${func}_n.txt`
echo "