# HG changeset patch # User davidvanzessen # Date 1458906612 14400 # Node ID a7381fd96dad04caae4011801954598f8a787749 # Parent 4262e880472d55ecd3140d0f12703aad3f3eafce Uploaded diff -r 4262e880472d -r a7381fd96dad merge_and_filter.r --- a/merge_and_filter.r Fri Mar 25 04:39:18 2016 -0400 +++ b/merge_and_filter.r Fri Mar 25 07:50:12 2016 -0400 @@ -97,15 +97,19 @@ if(filter_unique != "no"){ clmns = names(result) - result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR1.IMGT, result$FR2.IMGT, result$FR3.IMGT) + if(grepl("_c", filter_unique)){ + result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT, result$best_match) + } else { + result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT) + } result.filtered = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] fltr = result$Sequence.ID %in% result.filtered$Sequence.ID #fltr = result$unique.def %in% result.filtered$unique.def - - result = result[fltr,] - - if(filter_unique == "keep"){ + + if(grepl("keep", filter_unique)){ result = result[!fltr,] + } else { + result = result[fltr,] } result = result[,clmns] diff -r 4262e880472d -r a7381fd96dad mutation_analysis.py --- a/mutation_analysis.py Fri Mar 25 04:39:18 2016 -0400 +++ b/mutation_analysis.py Fri Mar 25 07:50:12 2016 -0400 @@ -57,13 +57,16 @@ linesplt = line.split("\t") ID = linesplt[IDIndex] genedic[ID] = linesplt[best_matchIndex] - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] - mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] - mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] - + try: + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] + mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + except: + print linesplt + print linecount mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] diff -r 4262e880472d -r a7381fd96dad mutation_analysis.xml --- a/mutation_analysis.xml Fri Mar 25 04:39:18 2016 -0400 +++ b/mutation_analysis.xml Fri Mar 25 07:50:12 2016 -0400 @@ -19,7 +19,13 @@ - + + + + + + + @@ -33,11 +39,6 @@ - - - - - diff -r 4262e880472d -r a7381fd96dad wrapper.sh --- a/wrapper.sh Fri Mar 25 04:39:18 2016 -0400 +++ b/wrapper.sh Fri Mar 25 07:50:12 2016 -0400 @@ -14,7 +14,7 @@ mkdir $outdir echo "---------------- read parameters ----------------" -echo "---------------- read parameters ----------------" > $output +echo "---------------- read parameters ----------------
" > $output echo "unpacking IMGT file" @@ -43,7 +43,7 @@ echo "${BLASTN_DIR}" echo "identification ($method)" -echo "identification ($method)" >> $output +echo "identification ($method)
" >> $output echo "blast or custom" @@ -66,12 +66,12 @@ fi echo "---------------- merge_and_filter.r ----------------" -echo "---------------- merge_and_filter.r ----------------" >> $output +echo "---------------- merge_and_filter.r ----------------
" >> $output Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} echo "---------------- mutation_analysis.r ----------------" -echo "---------------- mutation_analysis.r ----------------" >> $output +echo "---------------- mutation_analysis.r ----------------
" >> $output genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" echo "R mutation analysis" @@ -83,13 +83,13 @@ echo "---------------- mutation_analysis.py ----------------" -echo "---------------- mutation_analysis.py ----------------" >> $output +echo "---------------- mutation_analysis.py ----------------
" >> $output python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt echo "R AA histogram" echo "---------------- aa_histogram.r ----------------" -echo "---------------- aa_histogram.r ----------------" >> $output +echo "---------------- aa_histogram.r ----------------
" >> $output Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1