changeset 3:b850200d4335 draft

Uploaded
author davidvanzessen
date Thu, 15 May 2014 09:30:54 -0400
parents fd1b76816395
children 0748ef4d42d3
files RScript.r plotting_merged.xml r_wrapper.sh report_tcell.xml
diffstat 4 files changed, 93 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/RScript.r	Thu Mar 27 10:54:03 2014 -0400
+++ b/RScript.r	Thu May 15 09:30:54 2014 -0400
@@ -6,6 +6,9 @@
 outFile = args[2]
 outDir = args[3]
 clonalType = args[4]
+species = args[5]
+locus = args[6]
+selection = args[7]
 
 if (!("gridExtra" %in% rownames(installed.packages()))) {
 	install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") 
@@ -63,7 +66,9 @@
 	PRODF$freq = as.numeric(PRODF$freq)
 } else {
 	PRODF$freq = 1
-	PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
+	if(selection == "unique"){
+		PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
+	}
 }
 
 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])
@@ -87,20 +92,55 @@
 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
 
-V = c("v.name\tchr.orderV\nTRBV1\t1\nTRBV2\t2\nTRBV3\t3\nTRBV4\t4\nTRBV5\t5\nTRBV12-1\t6\nTRBV13-1\t7\nTRBV12-2\t8\nTRBV13-2\t9\nTRBV13-3\t10\nTRBV14\t11\nTRBV15\t12\nTRBV16\t13\nTRBV17\t14\nTRBV19\t15\nTRBV20\t16\nTRBV23\t17\nTRBV24\t18\nTRBV26\t19\nTRBV29\t20\nTRBV30\t21\nTRBV31\t22\n")
+V = c("v.name\tchr.orderV\n")
+D = c("v.name\tchr.orderD\n")	
+J = c("v.name\tchr.orderJ\n")
+
+if(species == "human"){
+	if(locus == "trb"){		
+		V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48")
+		D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n")	
+		J = c("v.name\tchr.orderJ\nTRBJ1-1\t1\nTRBJ1-2\t2\nTRBJ1-3\t3\nTRBJ1-4\t4\nTRBJ1-5\t5\nTRBJ1-6\t6\nTRBJ2-1\t7\nTRBJ2-2\t8\nTRBJ2-3\t9\nTRBJ2-4\t10\nTRBJ2-5\t11\nTRBJ2-6\t12\nTRBJ2-7\t13")
+	} else if (locus == "tra"){
+		V = c("v.name\tchr.orderVTRAV1-1\t1\nTRAV1-2\t2\nTRAV2\t3\nTRAV3\t4\nTRAV4\t5\nTRAV5\t6\nTRAV6\t7\nTRAV7\t8\nTRAV8-1\t9\nTRAV9-1\t10\nTRAV10\t11\nTRAV12-1\t12\nTRAV8-2\t13\nTRAV8-3\t14\nTRAV13-1\t15\nTRAV12-2\t16\nTRAV8-4\t17\nTRAV13-2\t18\nTRAV14/DV4\t19\nTRAV9-2\t20\nTRAV12-3\t21\nTRAV8-6\t22\nTRAV16\t23\nTRAV17\t24\nTRAV18\t25\nTRAV19\t26\nTRAV20\t27\nTRAV21\t28\nTRAV22\t29\nTRAV23/DV6\t30\nTRAV24\t31\nTRAV25\t32\nTRAV26-1\t33\nTRAV27\t34\nTRAV29/DV5\t35\nTRAV30\t36\nTRAV26-2\t37\nTRAV34\t38\nTRAV35\t39\nTRAV36/DV7\t40\nTRAV38-1\t41\nTRAV38-2/DV8\t42\nTRAV39\t43\nTRAV40\t44\nTRAV41\t45\n")
+		D = c("v.name\tchr.orderD\n")	
+		J = c("v.name\tchr.orderJ\nTRAJ57\t1\nTRAJ56\t2\nTRAJ54\t3\nTRAJ53\t4\nTRAJ52\t5\nTRAJ50\t6\nTRAJ49\t7\nTRAJ48\t8\nTRAJ47\t9\nTRAJ46\t10\nTRAJ45\t11\nTRAJ44\t12\nTRAJ43\t13\nTRAJ42\t14\nTRAJ41\t15\nTRAJ40\t16\nTRAJ39\t17\nTRAJ38\t18\nTRAJ37\t19\nTRAJ36\t20\nTRAJ34\t21\nTRAJ33\t22\nTRAJ32\t23\nTRAJ31\t24\nTRAJ30\t25\nTRAJ29\t26\nTRAJ28\t27\nTRAJ27\t28\nTRAJ26\t29\nTRAJ24\t30\nTRAJ23\t31\nTRAJ22\t32\nTRAJ21\t33\nTRAJ20\t34\nTRAJ18\t35\nTRAJ17\t36\nTRAJ16\t37\nTRAJ15\t38\nTRAJ14\t39\nTRAJ13\t40\nTRAJ12\t41\nTRAJ11\t42\nTRAJ10\t43\nTRAJ9\t44\nTRAJ8\t45\nTRAJ7\t46\nTRAJ6\t47\nTRAJ5\t48\nTRAJ4\t49\nTRAJ3\t50")
+	} else if (locus == "trg"){
+		cat("human trg not yet implemented")
+	} else if (locus == "trd"){
+		cat("human trd not yet implemented")
+	}
+} else if (species == "mouse"){
+	if(locus == "trb"){		
+		cat("mouse trb not yet implemented")
+	} else if (locus == "tra"){
+		cat("mouse tra not yet implemented")
+	} else if (locus == "trg"){
+		cat("mouse trg not yet implemented")
+	} else if (locus == "trd"){
+		cat("mouse trd not yet implemented")
+	}
+}
+useD = TRUE
+if(species == "human" && locus == "tra"){
+	useD = FALSE
+	cat("No D Genes in this species/locus")
+}
+
+
 tcV = textConnection(V)
 Vchain = read.table(tcV, sep="\t", header=TRUE)
 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
 close(tcV)
 
-D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n")	
+
 tcD = textConnection(D)
 Dchain = read.table(tcD, sep="\t", header=TRUE)
 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE)
 close(tcD)
 
 
-J = c("v.name\tchr.orderJ\nTRBJ1-1\t1\nTRBJ1-2\t2\nTRBJ1-3\t3\nTRBJ1-4\t4\nTRBJ1-5\t5\nTRBJ2-1\t6\nTRBJ2-2\t7\nTRBJ2-3\t8\nTRBJ2-4\t9\nTRBJ2-5\t10\nTRBJ2-6\t11\nTRBJ2-7\t12\n")
+
 tcJ = textConnection(J)
 Jchain = read.table(tcJ, sep="\t", header=TRUE)
 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE)
@@ -201,6 +241,7 @@
 revVchain$chr.orderV = rev(revVchain$chr.orderV)
 revDchain$chr.orderD = rev(revDchain$chr.orderD)
 
+
 plotVD <- function(dat){
 	if(length(dat[,1]) == 0){
 		return()
@@ -236,8 +277,6 @@
 
 lapply(VDList, FUN=plotVD)
 
-
-
 plotVJ <- function(dat){
 	if(length(dat[,1]) == 0){
 		return()
@@ -304,7 +343,6 @@
 DJList = split(completeDJ, f=completeDJ[,"Sample"])
 lapply(DJList, FUN=plotDJ)
 
-
 sampleFile <- file("samples.txt")
 un = unique(test$Sample)
 un = paste(un, sep="\n")
--- a/plotting_merged.xml	Thu Mar 27 10:54:03 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<tool id="report_clonality_igg" name="Report Clonality" version="1.0">
-	<description> </description>
-	<command interpreter="bash">
-		r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype_select"
-	</command>
-	<inputs>
-	<param name="in_file" format="tabular" type="data" label="Data to Process" />
-	<param name="clonaltype_select" type="select" label="Clonal Type Definition">
-		<option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3.Seq</option>
-		<option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3.Seq.DNA</option>
-		<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq">Top.V.Gene, Top.J.Gene, CDR3.Seq</option>
-		<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.J.Gene, CDR3.Seq.DNA</option>
-		<option value="Top.V.Gene,Top.D.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3.Seq.DNA</option>
-	</param>
-
-	</inputs>
-	<outputs>
-		<data format="html" name="out_file" />
-	</outputs>
-	<help>
-		Step 4 of the Immune Repertoire tools, plots the merged data, generating 3 bar charts for V, D and J frequencies and 3 heatmaps for every sample (V-D, V-J, D-J)
-	</help>
-</tool>
--- a/r_wrapper.sh	Thu Mar 27 10:54:03 2014 -0400
+++ b/r_wrapper.sh	Thu May 15 09:30:54 2014 -0400
@@ -4,9 +4,16 @@
 outputDir=$3
 outputFile=$3/index.html #$2
 clonalType=$4
+species=$5
+locus=$6
+selection=$7
+useD="true"
+if [[ "$species" == "human" && "$locus" = "tra" ]] ; then
+	useD="false"
+fi
 dir="$(cd "$(dirname "$0")" && pwd)"
 mkdir $3
-Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType 2>&1
+Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType $species $locus $selection 2>&1
 cp $dir/tabber.js $outputDir
 cp $dir/style.css $outputDir
 cp $dir/script.js $outputDir
@@ -26,10 +33,14 @@
 
 echo "<img src='CDR3LengthPlot.png'/><br />" >> $outputFile
 echo "<img src='VFPlot.png'/>" >> $outputFile
-echo "<img src='DFPlot.png'/>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<img src='DFPlot.png'/>" >> $outputFile
+fi
 echo "<img src='JFPlot.png'/>" >> $outputFile
 echo "<img src='VPlot.png'/>" >> $outputFile
-echo "<img src='DPlot.png'/>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<img src='DPlot.png'/>" >> $outputFile
+fi
 echo "<img src='JPlot.png'/></div>" >> $outputFile
 
 samples=`cat $outputDir/samples.txt`
@@ -37,9 +48,14 @@
 echo "<div class='tabbertab' title='Heatmaps'><div class='tabber'>" >> $outputFile
 for sample in $samples; do
 	echo "<div class='tabbertab' title='$sample'><table border='1'><tr>" >> $outputFile
-	echo "<td><img src='HeatmapVD_$sample.png'/></td>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<td><img src='HeatmapVD_$sample.png'/></td>" >> $outputFile
+	fi
 	echo "<td><img src='HeatmapVJ_$sample.png'/></td>" >> $outputFile
-	echo "<td><img src='HeatmapDJ_$sample.png'/></td></tr></table></div>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<td><img src='HeatmapDJ_$sample.png'/></td>" >> $outputFile
+	fi
+	echo "</tr></table></div>" >> $outputFile
 	count=$((count+1))
 done
 echo "</div></div>" >> $outputFile
@@ -110,17 +126,25 @@
 echo "<tr><td>The dataset used to generate the CDR3 length frequency graph</td><td><a href='CDR3LengthPlot.csv'>Download</a></td></tr>" >> $outputFile
 
 echo "<tr><td>The dataset used to generate the V gene family frequency graph</td><td><a href='VFFrequency.csv'>Download</a></td></tr>" >> $outputFile
-echo "<tr><td>The dataset used to generate the D gene family frequency graph</td><td><a href='DFFrequency.csv'>Download</a></td></tr>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<tr><td>The dataset used to generate the D gene family frequency graph</td><td><a href='DFFrequency.csv'>Download</a></td></tr>" >> $outputFile
+fi
 echo "<tr><td>The dataset used to generate the J gene family frequency graph</td><td><a href='JFFrequency.csv'>Download</a></td></tr>" >> $outputFile
 
 echo "<tr><td>The dataset used to generate the V gene frequency graph</td><td><a href='VFrequency.csv'>Download</a></td></tr>" >> $outputFile
-echo "<tr><td>The dataset used to generate the D gene frequency graph</td><td><a href='DFrequency.csv'>Download</a></td></tr>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<tr><td>The dataset used to generate the D gene frequency graph</td><td><a href='DFrequency.csv'>Download</a></td></tr>" >> $outputFile
+fi
 echo "<tr><td>The dataset used to generate the J gene frequency graph</td><td><a href='JFrequency.csv'>Download</a></td></tr>" >> $outputFile
 
 for sample in $samples; do
-	echo "<tr><td>The data used to generate the VD heatmap for $sample.</td><td><a href='HeatmapVD_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<tr><td>The data used to generate the VD heatmap for $sample.</td><td><a href='HeatmapVD_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	fi
 	echo "<tr><td>The data used to generate the VJ heatmap for $sample.</td><td><a href='HeatmapVJ_$sample.csv'>Download</a></td></tr>" >> $outputFile
-	echo "<tr><td>The data used to generate the DJ heatmap for $sample.</td><td><a href='HeatmapDJ_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<tr><td>The data used to generate the DJ heatmap for $sample.</td><td><a href='HeatmapDJ_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	fi
 done
 
 echo "</table>" >> $outputFile
--- a/report_tcell.xml	Thu Mar 27 10:54:03 2014 -0400
+++ b/report_tcell.xml	Thu May 15 09:30:54 2014 -0400
@@ -1,7 +1,7 @@
 <tool id="report_clonality_t-cells" name="Report Clonality T-Cells" version="1.0">
 	<description> </description>
 	<command interpreter="bash">
-		r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype_select" "$species"
+		r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype_select" $species $locus $selection
 	</command>
 	<inputs>
 	<param name="in_file" format="tabular" type="data" label="Data to Process" />
@@ -12,10 +12,23 @@
 		<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.J.Gene, CDR3.Seq.DNA</option>
 		<option value="Top.V.Gene,Top.D.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3.Seq.DNA</option>
 	</param>
-	<param name="species" type="select" label="Human or Mouse (Does nothing)">
+
+	<param name="species" type="select" label="Species">
 		<option value="human">Human</option>
 		<option value="mouse">Mouse</option>
 	</param>
+
+	<param name="locus" type="select" label="Locus">
+		<option value="trb">TRB</option>
+		<option value="tra">TRA</option>
+		<option value="trg">TRG</option>
+		<option value="trd">TRD</option>
+	</param>
+
+	<param name="selection" type="select" label="Selection">
+		<option value="unique">Unique (Based on clonaltype)</option>
+		<option value="all">All</option>
+	</param>
 	</inputs>
 	<outputs>
 		<data format="html" name="out_file" />