changeset 78:b523ce95d857 draft

Uploaded
author davidvanzessen
date Wed, 11 May 2016 10:29:33 -0400
parents c5c86d15cb94
children 0513b46178c4
files merge_and_filter.r mutation_analysis.py mutation_analysis.r sequence_overview.r wrapper.sh
diffstat 5 files changed, 16 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Mon May 09 03:56:38 2016 -0400
+++ b/merge_and_filter.r	Wed May 11 10:29:33 2016 -0400
@@ -63,10 +63,11 @@
 	unmatched = summ[!higher_than,]
 	unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
 	unmatched$best_match = paste("unmatched,", unmatched$best_match)
+	summ[!higher_than,"best_match"] = paste("unmatched,", summ[!higher_than,"best_match"])
 }
 
 if(any(higher_than)){
-	summ = summ[higher_than,]
+	#summ = summ[higher_than,]
 }
 print(paste("Number of matched sequences:", nrow(summ)))
 
--- a/mutation_analysis.py	Mon May 09 03:56:38 2016 -0400
+++ b/mutation_analysis.py	Wed May 11 10:29:33 2016 -0400
@@ -228,7 +228,6 @@
 	l = int(l / 2)
 	
 	if len(lst) % 2 == 0:
-		print "list length is", l
 		return float(lst[l] + lst[(l - 1)]) / 2.0
 	else:
 		return lst[l]
@@ -246,8 +245,7 @@
 	with open(directory + "all_" + fname + "_value.txt", 'r') as v:
 		valuedic["total_" + fname] = float(v.readlines()[0].rstrip())
 	
-print valuedic
-	
+
 def get_xyz(lst, gene, f, fname):
 	x = int(round(f(lst)))
 	y = valuedic[gene + "_" + fname]
@@ -265,7 +263,7 @@
 			o.write(typ + " (%)")
 			curr = dic[typ]
 			for gene in genes:
-				geneMatcher = re.compile(".*" + gene + ".*")
+				geneMatcher = re.compile("^" + gene + ".*")
 				if valuedic[gene + "_" + fname] is 0:
 					o.write(",0,0,0")
 				else:
--- a/mutation_analysis.r	Mon May 09 03:56:38 2016 -0400
+++ b/mutation_analysis.r	Wed May 11 10:29:33 2016 -0400
@@ -169,7 +169,7 @@
 setwd(outputdir)
 
 calculate_result = function(i, gene, dat, matrx, f, fname, name){
-	tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
+	tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),]
 
 	j = i - 1
 	x = (j * 3) + 1
@@ -267,7 +267,9 @@
   write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
   
   cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep=""))
-  cat(length(tmp$Sequence.ID), file=paste(name, "_", fname, "_n.txt" ,sep=""))
+  cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep=""))
+  
+  print(paste(fname, name, nrow(tmp)))
   
   matrx
 }
--- a/sequence_overview.r	Mon May 09 03:56:38 2016 -0400
+++ b/sequence_overview.r	Wed May 11 10:29:33 2016 -0400
@@ -16,7 +16,7 @@
 
 dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT, dat$FR2.IMGT, dat$FR3.IMGT)
 
-IDs = dat[,c("Sequence.ID", "seq_conc", "best_match")]
+IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
 IDs$best_match = as.character(IDs$best_match)
 
 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
--- a/wrapper.sh	Mon May 09 03:56:38 2016 -0400
+++ b/wrapper.sh	Wed May 11 10:29:33 2016 -0400
@@ -86,7 +86,7 @@
 echo "---------------- mutation_analysis.r ----------------"
 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
 
-genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
+genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
 echo "R mutation analysis"
 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
 
@@ -99,7 +99,6 @@
 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
 
 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
-echo "R AA histogram"
 
 echo "---------------- aa_histogram.r ----------------"
 echo "---------------- aa_histogram.r ----------------<br />" >> $output
@@ -141,15 +140,18 @@
 		tmp=`cat $outdir/${gene}_${func}_n.txt`
 		echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
 	done
+	
+	tmp=`cat $outdir/unmatched_${func}_n.txt`
+	echo "<th><a href='unmatched.txt'>unmatched (N = $tmp)</a></th>" >> $output
 	tmp=`cat $outdir/all_${func}_n.txt`
-	echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
+	echo "<th><a href='matched_${func}_all.txt'>all (N = $tmp)</a></th>" >> $output
 
-	while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
+	while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
 	do
 		if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
 		else
-			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
+			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
 		fi
 	done < $outdir/result.txt