changeset 63:a7381fd96dad draft

Uploaded
author davidvanzessen
date Fri, 25 Mar 2016 07:50:12 -0400
parents 4262e880472d
children 0fdd90f7c654
files merge_and_filter.r mutation_analysis.py mutation_analysis.xml wrapper.sh
diffstat 4 files changed, 32 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Fri Mar 25 04:39:18 2016 -0400
+++ b/merge_and_filter.r	Fri Mar 25 07:50:12 2016 -0400
@@ -97,15 +97,19 @@
 if(filter_unique != "no"){
 	clmns = names(result)
 	
-	result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR1.IMGT, result$FR2.IMGT, result$FR3.IMGT)
+	if(grepl("_c", filter_unique)){
+		result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT, result$best_match)
+	} else {
+		result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR2.IMGT, result$FR3.IMGT)
+	}
 	result.filtered = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
 	fltr = result$Sequence.ID %in% result.filtered$Sequence.ID
 	#fltr = result$unique.def %in% result.filtered$unique.def
-	
-	result = result[fltr,]
-	
-	if(filter_unique == "keep"){
+		
+	if(grepl("keep", filter_unique)){
 		result = result[!fltr,]
+	} else {
+		result = result[fltr,]
 	}
 	
 	result = result[,clmns]
--- a/mutation_analysis.py	Fri Mar 25 04:39:18 2016 -0400
+++ b/mutation_analysis.py	Fri Mar 25 07:50:12 2016 -0400
@@ -57,13 +57,16 @@
 		linesplt = line.split("\t")
 		ID = linesplt[IDIndex]
 		genedic[ID] = linesplt[best_matchIndex]
-		mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
-		mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
-		mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
-		mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
-		mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
-		mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
-
+		try:
+			mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
+			mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
+			mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
+			mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
+			mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
+			mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
+		except:
+			print linesplt
+			print linecount
 		mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 		mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 
--- a/mutation_analysis.xml	Fri Mar 25 04:39:18 2016 -0400
+++ b/mutation_analysis.xml	Fri Mar 25 07:50:12 2016 -0400
@@ -19,7 +19,13 @@
 			<option value="remove_unknown">Remove "unknown" and "unknown (see comment)"</option>
 			<option value="dont_filter">Don't filter</option>
 		</param>
-		
+		<param name="filter_uniques" type="select" label="Filter unique sequences">
+			<option value="yes">Remove uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3)</option>
+			<option value="yes_c">Remove uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3 + C)</option>
+			<option value="keep">Keep uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3)</option>
+			<option value="keep_c">Keep uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3 + C)</option>
+			<option value="no" selected="true">No</option>
+		</param>
 		<param name="unique" type="select" label="Remove duplicates based on" help="" >
 			<option value="AA.JUNCTION_V_subclass" selected="true">AA.JUNCTION + V + subclass</option>
 			<option value="AA.JUNCTION_subclass">AA.JUNCTION + subclass</option>
@@ -33,11 +39,6 @@
 				<option value="no" selected="true">No</option>
 			</param>
 		</conditional>
-		<param name="filter_uniques" type="select" label="Filter unique sequences" help="Filter out the sequences (based on CDR1, FR2, CDR2, FR3 and CDR3) that only occur once.">
-			<option value="yes">Yes</option>
-			<option value="no" selected="true">No</option>
-			<option value="keep">Keep uniques and filter out the duplicates</option>
-		</param>
 	</inputs>
 	<outputs>
 		<data format="html" name="out_file" label = "Mutation analysis on ${in_file.name}"/>
--- a/wrapper.sh	Fri Mar 25 04:39:18 2016 -0400
+++ b/wrapper.sh	Fri Mar 25 07:50:12 2016 -0400
@@ -14,7 +14,7 @@
 mkdir $outdir
 
 echo "---------------- read parameters ----------------"
-echo "---------------- read parameters ----------------" > $output
+echo "---------------- read parameters ----------------<br />" > $output
 
 echo "unpacking IMGT file"
 
@@ -43,7 +43,7 @@
 echo "${BLASTN_DIR}"
 
 echo "identification ($method)"
-echo "identification ($method)" >> $output
+echo "identification ($method)<br />" >> $output
 
 echo "blast or custom"
 
@@ -66,12 +66,12 @@
 fi
 
 echo "---------------- merge_and_filter.r ----------------"
-echo "---------------- merge_and_filter.r ----------------" >> $output
+echo "---------------- merge_and_filter.r ----------------<br />" >> $output
 
 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
 
 echo "---------------- mutation_analysis.r ----------------"
-echo "---------------- mutation_analysis.r ----------------" >> $output
+echo "---------------- mutation_analysis.r ----------------<br />" >> $output
 
 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
 echo "R mutation analysis"
@@ -83,13 +83,13 @@
 
 
 echo "---------------- mutation_analysis.py ----------------"
-echo "---------------- mutation_analysis.py ----------------" >> $output
+echo "---------------- mutation_analysis.py ----------------<br />" >> $output
 
 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
 echo "R AA histogram"
 
 echo "---------------- aa_histogram.r ----------------"
-echo "---------------- aa_histogram.r ----------------" >> $output
+echo "---------------- aa_histogram.r ----------------<br />" >> $output
 
 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1