# HG changeset patch
# User davidvanzessen
# Date 1458906612 14400
# Node ID a7381fd96dad04caae4011801954598f8a787749
# Parent 4262e880472d55ecd3140d0f12703aad3f3eafce
Uploaded
diff -r 4262e880472d -r a7381fd96dad merge_and_filter.r
--- a/merge_and_filter.r Fri Mar 25 04:39:18 2016 -0400
+++ b/merge_and_filter.r Fri Mar 25 07:50:12 2016 -0400
@@ -97,15 +97,19 @@
if(filter_unique != "no"){
clmns = names(result)
- result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR1.IMGT, result$FR2.IMGT, result$FR3.IMGT)
+ if(grepl("_c", filter_unique)){
+ result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT, result$best_match)
+ } else {
+ result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT)
+ }
result.filtered = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
fltr = result$Sequence.ID %in% result.filtered$Sequence.ID
#fltr = result$unique.def %in% result.filtered$unique.def
-
- result = result[fltr,]
-
- if(filter_unique == "keep"){
+
+ if(grepl("keep", filter_unique)){
result = result[!fltr,]
+ } else {
+ result = result[fltr,]
}
result = result[,clmns]
diff -r 4262e880472d -r a7381fd96dad mutation_analysis.py
--- a/mutation_analysis.py Fri Mar 25 04:39:18 2016 -0400
+++ b/mutation_analysis.py Fri Mar 25 07:50:12 2016 -0400
@@ -57,13 +57,16 @@
linesplt = line.split("\t")
ID = linesplt[IDIndex]
genedic[ID] = linesplt[best_matchIndex]
- mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
- mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
- mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
- mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
- mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
- mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
-
+ try:
+ mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
+ mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
+ mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
+ mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
+ mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
+ mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
+ except:
+ print linesplt
+ print linecount
mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
diff -r 4262e880472d -r a7381fd96dad mutation_analysis.xml
--- a/mutation_analysis.xml Fri Mar 25 04:39:18 2016 -0400
+++ b/mutation_analysis.xml Fri Mar 25 07:50:12 2016 -0400
@@ -19,7 +19,13 @@
-
+
+
+
+
+
+
+
@@ -33,11 +39,6 @@
-
-
-
-
-
diff -r 4262e880472d -r a7381fd96dad wrapper.sh
--- a/wrapper.sh Fri Mar 25 04:39:18 2016 -0400
+++ b/wrapper.sh Fri Mar 25 07:50:12 2016 -0400
@@ -14,7 +14,7 @@
mkdir $outdir
echo "---------------- read parameters ----------------"
-echo "---------------- read parameters ----------------" > $output
+echo "---------------- read parameters ----------------
" > $output
echo "unpacking IMGT file"
@@ -43,7 +43,7 @@
echo "${BLASTN_DIR}"
echo "identification ($method)"
-echo "identification ($method)" >> $output
+echo "identification ($method)
" >> $output
echo "blast or custom"
@@ -66,12 +66,12 @@
fi
echo "---------------- merge_and_filter.r ----------------"
-echo "---------------- merge_and_filter.r ----------------" >> $output
+echo "---------------- merge_and_filter.r ----------------
" >> $output
Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
echo "---------------- mutation_analysis.r ----------------"
-echo "---------------- mutation_analysis.r ----------------" >> $output
+echo "---------------- mutation_analysis.r ----------------
" >> $output
genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
echo "R mutation analysis"
@@ -83,13 +83,13 @@
echo "---------------- mutation_analysis.py ----------------"
-echo "---------------- mutation_analysis.py ----------------" >> $output
+echo "---------------- mutation_analysis.py ----------------
" >> $output
python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
echo "R AA histogram"
echo "---------------- aa_histogram.r ----------------"
-echo "---------------- aa_histogram.r ----------------" >> $output
+echo "---------------- aa_histogram.r ----------------
" >> $output
Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1