# HG changeset patch # User davidvanzessen # Date 1462976973 14400 # Node ID b523ce95d85714d7caab9377553de057ec5caf72 # Parent c5c86d15cb94306f4788387ea44aee87e80e0bd8 Uploaded diff -r c5c86d15cb94 -r b523ce95d857 merge_and_filter.r --- a/merge_and_filter.r Mon May 09 03:56:38 2016 -0400 +++ b/merge_and_filter.r Wed May 11 10:29:33 2016 -0400 @@ -63,10 +63,11 @@ unmatched = summ[!higher_than,] unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] unmatched$best_match = paste("unmatched,", unmatched$best_match) + summ[!higher_than,"best_match"] = paste("unmatched,", summ[!higher_than,"best_match"]) } if(any(higher_than)){ - summ = summ[higher_than,] + #summ = summ[higher_than,] } print(paste("Number of matched sequences:", nrow(summ))) diff -r c5c86d15cb94 -r b523ce95d857 mutation_analysis.py --- a/mutation_analysis.py Mon May 09 03:56:38 2016 -0400 +++ b/mutation_analysis.py Wed May 11 10:29:33 2016 -0400 @@ -228,7 +228,6 @@ l = int(l / 2) if len(lst) % 2 == 0: - print "list length is", l return float(lst[l] + lst[(l - 1)]) / 2.0 else: return lst[l] @@ -246,8 +245,7 @@ with open(directory + "all_" + fname + "_value.txt", 'r') as v: valuedic["total_" + fname] = float(v.readlines()[0].rstrip()) -print valuedic - + def get_xyz(lst, gene, f, fname): x = int(round(f(lst))) y = valuedic[gene + "_" + fname] @@ -265,7 +263,7 @@ o.write(typ + " (%)") curr = dic[typ] for gene in genes: - geneMatcher = re.compile(".*" + gene + ".*") + geneMatcher = re.compile("^" + gene + ".*") if valuedic[gene + "_" + fname] is 0: o.write(",0,0,0") else: diff -r c5c86d15cb94 -r b523ce95d857 mutation_analysis.r --- a/mutation_analysis.r Mon May 09 03:56:38 2016 -0400 +++ b/mutation_analysis.r Wed May 11 10:29:33 2016 -0400 @@ -169,7 +169,7 @@ setwd(outputdir) calculate_result = function(i, gene, dat, matrx, f, fname, name){ - tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] + tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),] j = i - 1 x = (j * 3) + 1 @@ -267,7 +267,9 @@ write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep="")) - cat(length(tmp$Sequence.ID), file=paste(name, "_", fname, "_n.txt" ,sep="")) + cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep="")) + + print(paste(fname, name, nrow(tmp))) matrx } diff -r c5c86d15cb94 -r b523ce95d857 sequence_overview.r --- a/sequence_overview.r Mon May 09 03:56:38 2016 -0400 +++ b/sequence_overview.r Wed May 11 10:29:33 2016 -0400 @@ -16,7 +16,7 @@ dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT, dat$FR2.IMGT, dat$FR3.IMGT) -IDs = dat[,c("Sequence.ID", "seq_conc", "best_match")] +IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] IDs$best_match = as.character(IDs$best_match) #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) diff -r c5c86d15cb94 -r b523ce95d857 wrapper.sh --- a/wrapper.sh Mon May 09 03:56:38 2016 -0400 +++ b/wrapper.sh Wed May 11 10:29:33 2016 -0400 @@ -86,7 +86,7 @@ echo "---------------- mutation_analysis.r ----------------" echo "---------------- mutation_analysis.r ----------------
" >> $output -genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" +genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" echo "R mutation analysis" Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1 @@ -99,7 +99,6 @@ echo "---------------- mutation_analysis.py ----------------
" >> $output python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt -echo "R AA histogram" echo "---------------- aa_histogram.r ----------------" echo "---------------- aa_histogram.r ----------------
" >> $output @@ -141,15 +140,18 @@ tmp=`cat $outdir/${gene}_${func}_n.txt` echo "${gene} (N = $tmp)" >> $output done + + tmp=`cat $outdir/unmatched_${func}_n.txt` + echo "unmatched (N = $tmp)" >> $output tmp=`cat $outdir/all_${func}_n.txt` - echo "all (N = $tmp)" >> $output + echo "all (N = $tmp)" >> $output - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz do if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh echo "$name${cax}/${cay} (${caz})${ca1x}/${ca1y} (${ca1z})${ca2x}/${ca2y} (${ca2z})${cgx}/${cgy} (${cgz})${cg1x}/${cg1y} (${cg1z})${cg2x}/${cg2y} (${cg2z})${cg3x}/${cg3y} (${cg3z})${cg4x}/${cg4y} (${cg4z})${cmx}/${cmy} (${cmz})${allx}/${ally} (${allz})" >> $output else - echo "$name${cax}/${cay} (${caz}%)${ca1x}/${ca1y} (${ca1z}%)${ca2x}/${ca2y} (${ca2z}%)${cgx}/${cgy} (${cgz}%)${cg1x}/${cg1y} (${cg1z}%)${cg2x}/${cg2y} (${cg2z}%)${cg3x}/${cg3y} (${cg3z}%)${cg4x}/${cg4y} (${cg4z}%)${cmx}/${cmy} (${cmz}%)${allx}/${ally} (${allz}%)" >> $output + echo "$name${cax}/${cay} (${caz}%)${ca1x}/${ca1y} (${ca1z}%)${ca2x}/${ca2y} (${ca2z}%)${cgx}/${cgy} (${cgz}%)${cg1x}/${cg1y} (${cg1z}%)${cg2x}/${cg2y} (${cg2z}%)${cg3x}/${cg3y} (${cg3z}%)${cg4x}/${cg4y} (${cg4z}%)${cmx}/${cmy} (${cmz}%)${unx}/${uny} (${unz}%)${allx}/${ally} (${allz}%)" >> $output fi done < $outdir/result.txt