Repository 'report_clonality_igg'
hg clone https://eddie.galaxyproject.org/repos/davidvanzessen/report_clonality_igg

Changeset 27:f919965e8816 (2015-03-06)
Previous changeset 26:01f05258f672 (2015-02-09) Next changeset 28:4f85b8ed0b2c (2015-03-12)
Commit message:
Uploaded
modified:
RScript.r
r_wrapper.sh
report_clonality_igg.xml
b
diff -r 01f05258f672 -r f919965e8816 RScript.r
--- a/RScript.r Mon Feb 09 08:53:30 2015 -0500
+++ b/RScript.r Fri Mar 06 09:59:53 2015 -0500
[
b'@@ -23,6 +23,11 @@\n }\n library(reshape2)\n \n+if (!("lymphclon" %in% rownames(installed.packages()))) {\n+  install.packages("lymphclon", repos="http://cran.xl-mirror.nl/") \n+}\n+library(lymphclon)\n+\n # ---------------------- parameters ----------------------\n \n args <- commandArgs(trailingOnly = TRUE)\n@@ -35,6 +40,7 @@\n species = args[5] #human or mouse\n locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD\n filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)\n+clonality_method = args[8]\n \n # ---------------------- Data preperation ----------------------\n \n@@ -217,7 +223,6 @@\n   useD = FALSE\n   cat("No D Genes in this species/locus")\n }\n-\n print(paste("useD:", useD))\n \n # ---------------------- merge with the frequency count ----------------------\n@@ -453,84 +458,96 @@\n \n if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available\n {\n-  write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)\n-  \n-  ClonalitySampleReplicatePrint <- function(dat){\n-    write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)\n-  }\n-  \n-  clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")])\n-  #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint)\n-  \n-  ClonalitySamplePrint <- function(dat){\n-    write.table(dat, paste("clonality_", unique(inputdata$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)\n-  }\n-  \n-  clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])\n-  #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint)\n-  \n-  clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])\n-  clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])\n-  clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count\n-  clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])\n-  clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")\n-  \n-  ct = c(\'Type\\tWeight\\n2\\t1\\n3\\t3\\n4\\t6\\n5\\t10\\n6\\t15\')\n-  tcct = textConnection(ct)\n-  CT  = read.table(tcct, sep="\\t", header=TRUE)\n-  close(tcct)\n-  clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)\n-  clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight\n-  \n-  ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")])\n-  ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])\n-  clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)\n-  ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads\n-  \n-  ReplicatePrint <- function(dat){\n-    write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n+  if(clonality_method == "boyd"){\n+    samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)\n+   \n+    for (sample in samples){\n+      res = data.frame(paste=character(0))\n+      sample_id = unique(sample$Sample)[[1]]\n+      for(replicate in unique(sample$Replicate)){\n+        tmp = sample[sample$Replicate == replicate,]\n+        clone_table = data.frame(table(tmp$clonaltype))\n+        clone_col_name = paste("V", replicate, sep="")\n+        colnames(clone_table) = c("paste", clone_col_name)\n+        res = merge(res, clone_table, by="paste", all=T)\n+      }\n+      \n+      res[is.na(res)] = 0      \n+      infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))\n+      \n+      write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)\n+      \n+      res$type = rowSums(res[,2:ncol(res)])\n+      \n+      coinc'..b'alFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])\n+    clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)\n+    clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow\n+    clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)\n+    clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)\n+    \n+    ClonalityScorePrint <- function(dat){\n+      write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n+    }\n+    \n+    clonalityScore = clonalFreqCount[c("Sample", "Result")]\n+    clonalityScore = unique(clonalityScore)\n+    \n+    clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])\n+    lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)\n+    \n+    clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]\n+    \n+    \n+    \n+    ClonalityOverviewPrint <- function(dat){\n+      write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n+    }\n+    \n+    clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)\n+    lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)\n   }\n-  \n-  ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])\n-  lapply(ReplicateSplit, FUN=ReplicatePrint)\n-  \n-  ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])\n-  clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)\n-  \n-  \n-  ReplicateSumPrint <- function(dat){\n-    write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n-  }\n-  \n-  ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])\n-  lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)\n-  \n-  clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])\n-  clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)\n-  clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow\n-  clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)\n-  clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)\n-  \n-  ClonalityScorePrint <- function(dat){\n-    write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n-  }\n-  \n-  clonalityScore = clonalFreqCount[c("Sample", "Result")]\n-  clonalityScore = unique(clonalityScore)\n-  \n-  clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])\n-  lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)\n-  \n-  clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]\n-  \n-  \n-  \n-  ClonalityOverviewPrint <- function(dat){\n-    write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n-  }\n-  \n-  clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)\n-  lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)\n }\n \n imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")\n'
b
diff -r 01f05258f672 -r f919965e8816 r_wrapper.sh
--- a/r_wrapper.sh Mon Feb 09 08:53:30 2015 -0500
+++ b/r_wrapper.sh Fri Mar 06 09:59:53 2015 -0500
[
@@ -7,6 +7,7 @@
 species=$5
 locus=$6
 filterproductive=$7
+clonality_method=$8
 dir="$(cd "$(dirname "$0")" && pwd)"
 useD="false"
 if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then
@@ -22,9 +23,10 @@
  useD="false"
  fi
 fi
+useD="false"
 mkdir $3
 cp $dir/genes.txt $outputDir
-Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive 2>&1
+Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1
 cp $dir/tabber.js $outputDir
 cp $dir/style.css $outputDir
 cp $dir/script.js $outputDir
@@ -89,34 +91,49 @@
 hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)"
 echo "$hasReplicateColumn"
 #if its a 'new' merged file with replicate info
-if [[ "$hasReplicateColumn" == "Yes" ]] ; then
+if [[ "$hasReplicateColumn" == "Yes" && "$clonalType" != "none" ]] ; then
  echo "<div class='tabbertab' title='Clonality'><div class='tabber'>" >> $outputFile
  for sample in $samples; do
- clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)"
- echo "<div class='tabbertab' title='$sample'><table border='1'>" >> $outputFile
- echo "<tr><td colspan='4'>Clonality Score: $clonalityScore</td></tr>" >> $outputFile
+ echo "${clonality_method}"
+ if [[ "${clonality_method}" == "old" ]] ; then
+ echo "in old"
+ clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)"
+ echo "<div class='tabbertab' title='$sample'><table border='1'>" >> $outputFile
+ echo "<tr><td colspan='4'>Clonality Score: $clonalityScore</td></tr>" >> $outputFile
 
- #replicate,reads,squared
- echo "<tr><td>Replicate ID</td><td>Number of Reads</td><td>Reads Squared</td><td></td></tr>" >> $outputFile
- while IFS=, read replicate reads squared
- do
+ #replicate,reads,squared
+ echo "<tr><td>Replicate ID</td><td>Number of Reads</td><td>Reads Squared</td><td></td></tr>" >> $outputFile
+ while IFS=, read replicate reads squared
+ do
+
+ echo "<tr><td>$replicate</td><td>$reads</td><td>$squared</td><td></td></tr>" >> $outputFile
+ done < $outputDir/ReplicateReads_$sample.csv
 
- echo "<tr><td>$replicate</td><td>$reads</td><td>$squared</td><td></td></tr>" >> $outputFile
- done < $outputDir/ReplicateReads_$sample.csv
-
- #sum of reads and reads squared
- while IFS=, read readsSum squaredSum
+ #sum of reads and reads squared
+ while IFS=, read readsSum squaredSum
+ do
+ echo "<tr><td>Sum</td><td>$readsSum</td><td>$squaredSum</td></tr>" >> $outputFile
+ done < $outputDir/ReplicateSumReads_$sample.csv
+
+ #overview
+ echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td><td>Coincidence Weight</td><td>Coincidences, Weighted</td></tr>" >> $outputFile
+ while IFS=, read type count weight weightedCount
  do
- echo "<tr><td>Sum</td><td>$readsSum</td><td>$squaredSum</td></tr>" >> $outputFile
- done < $outputDir/ReplicateSumReads_$sample.csv
-
- #overview
- echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td><td>Coincidence Weight</td><td>Coincidences, Weighted</td></tr>" >> $outputFile
- while IFS=, read type count weight weightedCount
- do
- echo "<tr><td>$type</td><td>$count</td><td>$weight</td><td>$weightedCount</td></tr>" >> $outputFile
- done < $outputDir/ClonalityOverView_$sample.csv
- echo "</table></div>" >> $outputFile
+ echo "<tr><td>$type</td><td>$count</td><td>$weight</td><td>$weightedCount</td></tr>" >> $outputFile
+ done < $outputDir/ClonalityOverView_$sample.csv
+ echo "</table></div>" >> $outputFile
+ else
+ echo "in new"
+ clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.csv)"
+ echo "<div class='tabbertab' title='$sample'>" >> $outputFile
+ echo "Lymphclon clonality score: <br />$clonalityScore<br /><br />" >> $outputFile
+ echo "<table border = 1>" >> $outputFile
+ while IFS=, read type count
+ do
+ echo "<tr><td>$type</td><td>$count</td></tr>" >> $outputFile
+ done < $outputDir/lymphclon_coincidences_$sample.csv
+ echo "</table></div>" >> $outputFile
+ fi
  done
  echo "</div></div>" >> $outputFile
 fi
b
diff -r 01f05258f672 -r f919965e8816 report_clonality_igg.xml
--- a/report_clonality_igg.xml Mon Feb 09 08:53:30 2015 -0500
+++ b/report_clonality_igg.xml Fri Mar 06 09:59:53 2015 -0500
b
@@ -2,14 +2,14 @@
  <description> </description>
  <command interpreter="bash">
 #if $gene_selection.source == "imgtdb"
- r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive
+ r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method
 #else
- r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive
+ r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method
 #end if
  </command>
  <inputs>
  <param name="in_file" format="tabular" type="data" label="Data to Process" />
- <param name="clonaltype" type="select" label="Clonal Type Definition">
+ <param name="clonaltype" type="select" label="Clonal Type Definition (Needed for clonality calculation)">
  <option value="none">Dont remove duplicates based on clonaltype</option>
  <option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3.Seq</option>
  <option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3.Seq.DNA</option>
@@ -89,6 +89,12 @@
  <option value="yes">Yes</option>
  <option value="no">No</option>
  </param>
+
+ <param name="clonality_method" type="select" label="Old clonality algorithm or the newer R package">
+ <option value="old">Old</option>
+ <option value="boyd">R Package</option>
+ </param>
+
  </inputs>
  <outputs>
  <data format="html" name="out_file" />