changeset 52:d3542f87a304 draft

Uploaded
author davidvanzessen
date Fri, 29 Jan 2016 08:11:31 -0500
parents d4e72eeea640
children 7290a88ea202
files merge_and_filter.r mutation_analysis.xml wrapper.sh
diffstat 3 files changed, 27 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Fri Jan 29 05:42:17 2016 -0500
+++ b/merge_and_filter.r	Fri Jan 29 08:11:31 2016 -0500
@@ -12,6 +12,7 @@
 method=args[9]
 functionality=args[10]
 unique_type=args[11]
+filter_unique=args[12] == "yes"
 
 summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
@@ -89,6 +90,25 @@
 	result$past = 1:nrow(result)
 }
 
+print(paste("filter uniques: ", filter_unique))
+
+if(filter_unique){
+	
+	clmns = names(result)
+	
+	result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR1.IMGT, result$FR2.IMGT, result$FR3.IMGT)
+	result.filtered = result[duplicated(result$unique.def),]
+	fltr = result$unique.def %in% result.filtered$unique.def
+	
+	result.removed = result[!fltr,]
+	
+	result = result[fltr,]
+	
+	result = result[,clmns]
+	
+	#write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T)
+}
+
 
 result = result[!duplicated(result$past), ]
 
--- a/mutation_analysis.xml	Fri Jan 29 05:42:17 2016 -0500
+++ b/mutation_analysis.xml	Fri Jan 29 08:11:31 2016 -0500
@@ -1,7 +1,7 @@
 <tool id="mutation_analysis_shm" name="Mutation Analysis" version="1.0">
 	<description></description>
 	<command interpreter="bash">
-		wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output
+		wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output $filter_uniques
 	</command>
 	<inputs>
 		<param name="in_file" type="data" label="IMGT zip file to be analysed" />
@@ -33,6 +33,10 @@
 				<option value="no" selected="true">No</option>
 			</param>
 		</conditional>
+		<param name="filter_uniques" type="select" label="Filter unique sequences" help="Filter out the sequences (based on CDR1, FR2, CDR2, FR3 and CDR3) that only occur once.">
+			<option value="yes">Yes</option>
+			<option value="no" selected="true">No</option>
+		</param>
 	</inputs>
 	<outputs>
 		<data format="html" name="out_file" label = "Mutation analysis on ${in_file.name}"/>
--- a/wrapper.sh	Fri Jan 29 05:42:17 2016 -0500
+++ b/wrapper.sh	Fri Jan 29 08:11:31 2016 -0500
@@ -10,6 +10,7 @@
 functionality=$7
 unique=$8
 naive_output=$9
+filter_unique=${10}
 mkdir $outdir
 
 type="`file $input`"
@@ -57,7 +58,7 @@
 
 
 echo "merging"
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
 
 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
 echo "R mutation analysis"