changeset 66:88e0e7665086 draft

Uploaded
author davidvanzessen
date Wed, 06 Apr 2016 05:54:13 -0400
parents ae8b721a2964
children 67a9ddf6a8f5
files merge_and_filter.r mutation_analysis.xml wrapper.sh
diffstat 3 files changed, 37 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Mon Apr 04 04:25:46 2016 -0400
+++ b/merge_and_filter.r	Wed Apr 06 05:54:13 2016 -0400
@@ -13,6 +13,7 @@
 functionality=args[10]
 unique_type=args[11]
 filter_unique=args[12]
+class_filter=args[13]
 
 summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
@@ -50,7 +51,11 @@
 
 print(paste("Number of sequences after productive filter:", nrow(summ)))
 
-higher_than=(summ$chunk_hit_percentage >= 70 & summ$nt_hit_percentage >= 70)
+splt = strsplit(class_filter, "_")[[1]]
+chunk_hit_threshold = as.numeric(splt[1])
+nt_hit_threshold = as.numeric(splt[2])
+
+higher_than=(summ$chunk_hit_percentage >= chunk_hit_threshold & summ$nt_hit_percentage >= nt_hit_threshold)
 
 unmatched=summ[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
 
--- a/mutation_analysis.xml	Mon Apr 04 04:25:46 2016 -0400
+++ b/mutation_analysis.xml	Wed Apr 06 05:54:13 2016 -0400
@@ -1,7 +1,7 @@
 <tool id="mutation_analysis_shm" name="Mutation Analysis" version="1.0">
 	<description></description>
 	<command interpreter="bash">
-		wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output $filter_uniques
+		wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output $filter_uniques $class_filter
 	</command>
 	<inputs>
 		<param name="in_file" type="data" label="IMGT zip file to be analysed" />
@@ -19,7 +19,7 @@
 			<option value="remove_unknown">Remove "unknown" and "unknown (see comment)"</option>
 			<option value="dont_filter">Don't filter</option>
 		</param>
-		<param name="filter_uniques" type="select" label="Filter unique sequences">
+		<param name="filter_uniques" type="select" label="Filter unique sequences" help="See below for an example.">
 			<option value="yes">Remove uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3)</option>
 			<option value="yes_c">Remove uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3 + C)</option>
 			<option value="keep">Keep uniques (CDR1 + FR2 + CDR2 + FR3 + CDR3)</option>
@@ -33,6 +33,11 @@
 			<option value="AA.JUNCTION">AA.JUNCTION</option>
 			<option value="none">Don't remove duplicates</option>
 		</param>
+		<param name="class_filter" type="select" label="Class/Sublass filter" help="" >
+			<option value="70_70" selected="true">>70% class and >70% subclass</option>
+			<option value="70_0">>70% class</option>
+			<option value="60_0">>60% class</option>
+		</param>
 		<conditional name="naive_output_cond">
 			<param name="naive_output" type="select" label="Output a file for naive analysis?">
 				<option value="yes">Yes</option>
@@ -47,7 +52,28 @@
 		</data>
 	</outputs>
 	<help>
-		Takes an IMGT zip (http://www.imgt.org/HighV-QUEST/search.action) file and creates a summarization of the mutation analysis.
+		Takes an IMGT zip (http://www.imgt.org/HighV-QUEST/search.action) file and creates a summarization of the mutation analysis.  
+		
+		+--------------------------+
+		|       unique filter      |
+		+--------+--------+--------+
+		| values | remove | keep   |
+		+--------+--------+--------+
+		|   A    |   A    |   A    |
+		+--------+--------+--------+
+		|   A    |   B    |   B    |
+		+--------+--------+--------+
+		|   B    |   D    |   C    |
+		+--------+--------+--------+
+		|   B    |        |   D    |
+		+--------+--------+--------+
+		|   C    |        |        |
+		+--------+--------+--------+
+		|   D    |        |        |
+		+--------+--------+--------+
+		|   D    |        |        |
+		+--------+--------+--------+
+		
 	</help>
 	<requirements>
     <requirement type="package" version="1.0">blastn</requirement>
--- a/wrapper.sh	Mon Apr 04 04:25:46 2016 -0400
+++ b/wrapper.sh	Wed Apr 06 05:54:13 2016 -0400
@@ -11,6 +11,7 @@
 unique=$8
 naive_output=$9
 filter_unique=${10}
+class_filter=${11}
 mkdir $outdir
 
 echo "---------------- read parameters ----------------"
@@ -78,7 +79,7 @@
 echo "---------------- merge_and_filter.r ----------------"
 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
 
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter}
 
 echo "---------------- mutation_analysis.r ----------------"
 echo "---------------- mutation_analysis.r ----------------<br />" >> $output