changeset 34:d436daae9d68 draft

Uploaded
author davidvanzessen
date Thu, 18 Jun 2015 03:10:11 -0400
parents ac9a4307861a
children 8dba36531e6e
files merge_and_filter.r mutation_analysis.xml wrapper.sh
diffstat 3 files changed, 43 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Thu Apr 16 09:36:19 2015 -0400
+++ b/merge_and_filter.r	Thu Jun 18 03:10:11 2015 -0400
@@ -9,6 +9,8 @@
 output = args[6]
 unmatchedfile = args[7]
 method=args[8]
+functionality=args[9]
+unique_type=args[10]
 
 summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
 mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
@@ -28,7 +30,13 @@
 }
 
 summ = merge(summ, gene_identification, by="Sequence.ID")
-summ = summ[summ$Functionality != "No results",]
+
+if(functionality == "no_results"){
+	summ = summ[summ$Functionality != "No results",]
+} else if (functionality == "no_result_unproductive"){
+	summ = summ[summ$Functionality != "No results" | summ$Functionality != "unproductive",]
+}
+
 higher_than=(summ$chunk_hit_percentage >= 70 & summ$nt_hit_percentage >= 70)
 unmatched = summ[!higher_than,]
 unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
@@ -61,7 +69,20 @@
 result$JGene = gsub("[*].*", "", result$JGene)
 
 #result$past = paste(result$AA.JUNCTION, result$VGene, result$JGene, (result$FR1.IMGT.Nb.of.mutations + result$CDR1.IMGT.Nb.of.mutations + result$FR2.IMGT.Nb.of.mutations + result$CDR2.IMGT.Nb.of.mutations + result$FR3.IMGT.Nb.of.mutations), result$best_match)
-result$past = paste(result$AA.JUNCTION, result$VGene, result$best_match)
+if(unique_type == "AA.JUNCTION_V_subclass"){
+	result$past = paste(result$AA.JUNCTION, result$VGene, result$best_match)
+} else if (unique_type == "AA.JUNCTION_subclass"){
+	result$past = paste(result$AA.JUNCTION, result$best_match)
+} else if (unique_type == "V_subclass"){
+	result$past = paste(result$VGene, result$best_match)
+} else if (unique_type == "AA.JUNCTION_V"){
+	result$past = paste(result$AA.JUNCTION, result$VGene)
+} else if (unique_type == "AA.JUNCTION"){
+	result$past = paste(result$AA.JUNCTION)
+} else {
+	result$past = 1:nrow(result)
+}
+
 
 result = result[!duplicated(result$past), ]
 
--- a/mutation_analysis.xml	Thu Apr 16 09:36:19 2015 -0400
+++ b/mutation_analysis.xml	Thu Jun 18 03:10:11 2015 -0400
@@ -1,7 +1,7 @@
 <tool id="mutation_analysis_shm" name="Mutation Analysis" version="1.0">
 	<description></description>
 	<command interpreter="bash">
-		wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1}
+		wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique
 	</command>
 	<inputs>
 		<param name="in_file" type="data" label="IMGT zip file to be analysed" />
@@ -13,6 +13,21 @@
 			<option value="yes">yes</option>
 			<option value="no" selected="true">no</option>
 		</param>
+		<param name="functionality" type="select" label="Functionality filter" help="" >
+			<option value="no_results" selected="true">Remove "No results"</option>
+			<option value="no_result_unproductive">Remove "No results" and "unproductive"</option>
+			<option value="dont_remove">Don't filter</option>
+		</param>
+		
+		<param name="unique" type="select" label="Remove duplicates based on" help="" >
+			<option value="AA.JUNCTION_V_subclass" selected="true">AA.JUNCTION + V + subclass</option>
+			<option value="AA.JUNCTION_subclass">AA.JUNCTION + subclass</option>
+			<option value="V_subclass">V + subclass</option>
+			<option value="AA.JUNCTION_V">AA.JUNCTION + V</option>
+			<option value="AA.JUNCTION">AA.JUNCTION</option>
+			<option value="none">Don't remove duplicates</option>
+		</param>
+		
 	</inputs>
 	<outputs>
 		<data format="html" name="out_file" label = "Mutation analysis on ${in_file.name}"/>
--- a/wrapper.sh	Thu Apr 16 09:36:19 2015 -0400
+++ b/wrapper.sh	Thu Jun 18 03:10:11 2015 -0400
@@ -7,6 +7,8 @@
 outdir=$4
 title=$5
 include_fr1=$6
+functionality=$7
+unique=$8
 mkdir $outdir
 
 unzip $input -d $PWD/files/ > $PWD/unziplog.log
@@ -40,7 +42,7 @@
 
 
 echo "merging"
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
 
 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
 echo "R mutation analysis"
@@ -74,7 +76,7 @@
 	fi
 done < $outdir/result.txt
 echo "</table>" >> $output
-echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
+echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
 
 
 echo "<img src='all.png'/><br />" >> $output