Mercurial > repos > davidvanzessen > mutation_analysis
changeset 63:a7381fd96dad draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 25 Mar 2016 07:50:12 -0400 |
parents | 4262e880472d |
children | 0fdd90f7c654 |
files | merge_and_filter.r mutation_analysis.py mutation_analysis.xml wrapper.sh |
diffstat | 4 files changed, 32 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Fri Mar 25 04:39:18 2016 -0400 +++ b/merge_and_filter.r Fri Mar 25 07:50:12 2016 -0400 @@ -97,15 +97,19 @@ if(filter_unique != "no"){ clmns = names(result) - result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR1.IMGT, result$FR2.IMGT, result$FR3.IMGT) + if(grepl("_c", filter_unique)){ + result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT, result$best_match) + } else { + result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT) + } result.filtered = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] fltr = result$Sequence.ID %in% result.filtered$Sequence.ID #fltr = result$unique.def %in% result.filtered$unique.def - - result = result[fltr,] - - if(filter_unique == "keep"){ + + if(grepl("keep", filter_unique)){ result = result[!fltr,] + } else { + result = result[fltr,] } result = result[,clmns]
--- a/mutation_analysis.py Fri Mar 25 04:39:18 2016 -0400 +++ b/mutation_analysis.py Fri Mar 25 07:50:12 2016 -0400 @@ -57,13 +57,16 @@ linesplt = line.split("\t") ID = linesplt[IDIndex] genedic[ID] = linesplt[best_matchIndex] - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] - mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] - mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] - + try: + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] + mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + except: + print linesplt + print linecount mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
--- a/mutation_analysis.xml Fri Mar 25 04:39:18 2016 -0400 +++ b/mutation_analysis.xml Fri Mar 25 07:50:12 2016 -0400 @@ -19,7 +19,13 @@ <option value="remove_unknown">Remove "unknown" and "unknown (see comment)"</option> <option value="dont_filter">Don't filter</option> </param> - + <param name="filter_uniques" type="select" label="Filter unique sequences"> + <option value="yes">Remove uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3)</option> + <option value="yes_c">Remove uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3 + C)</option> + <option value="keep">Keep uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3)</option> + <option value="keep_c">Keep uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3 + C)</option> + <option value="no" selected="true">No</option> + </param> <param name="unique" type="select" label="Remove duplicates based on" help="" > <option value="AA.JUNCTION_V_subclass" selected="true">AA.JUNCTION + V + subclass</option> <option value="AA.JUNCTION_subclass">AA.JUNCTION + subclass</option> @@ -33,11 +39,6 @@ <option value="no" selected="true">No</option> </param> </conditional> - <param name="filter_uniques" type="select" label="Filter unique sequences" help="Filter out the sequences (based on CDR1, FR2, CDR2, FR3 and CDR3) that only occur once."> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - <option value="keep">Keep uniques and filter out the duplicates</option> - </param> </inputs> <outputs> <data format="html" name="out_file" label = "Mutation analysis on ${in_file.name}"/>
--- a/wrapper.sh Fri Mar 25 04:39:18 2016 -0400 +++ b/wrapper.sh Fri Mar 25 07:50:12 2016 -0400 @@ -14,7 +14,7 @@ mkdir $outdir echo "---------------- read parameters ----------------" -echo "---------------- read parameters ----------------" > $output +echo "---------------- read parameters ----------------<br />" > $output echo "unpacking IMGT file" @@ -43,7 +43,7 @@ echo "${BLASTN_DIR}" echo "identification ($method)" -echo "identification ($method)" >> $output +echo "identification ($method)<br />" >> $output echo "blast or custom" @@ -66,12 +66,12 @@ fi echo "---------------- merge_and_filter.r ----------------" -echo "---------------- merge_and_filter.r ----------------" >> $output +echo "---------------- merge_and_filter.r ----------------<br />" >> $output Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} echo "---------------- mutation_analysis.r ----------------" -echo "---------------- mutation_analysis.r ----------------" >> $output +echo "---------------- mutation_analysis.r ----------------<br />" >> $output genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" echo "R mutation analysis" @@ -83,13 +83,13 @@ echo "---------------- mutation_analysis.py ----------------" -echo "---------------- mutation_analysis.py ----------------" >> $output +echo "---------------- mutation_analysis.py ----------------<br />" >> $output python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt echo "R AA histogram" echo "---------------- aa_histogram.r ----------------" -echo "---------------- aa_histogram.r ----------------" >> $output +echo "---------------- aa_histogram.r ----------------<br />" >> $output Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1