| Previous changeset 26:01f05258f672 (2015-02-09) Next changeset 28:4f85b8ed0b2c (2015-03-12) |
|
Commit message:
Uploaded |
|
modified:
RScript.r r_wrapper.sh report_clonality_igg.xml |
| b |
| diff -r 01f05258f672 -r f919965e8816 RScript.r --- a/RScript.r Mon Feb 09 08:53:30 2015 -0500 +++ b/RScript.r Fri Mar 06 09:59:53 2015 -0500 |
| [ |
| b'@@ -23,6 +23,11 @@\n }\n library(reshape2)\n \n+if (!("lymphclon" %in% rownames(installed.packages()))) {\n+ install.packages("lymphclon", repos="http://cran.xl-mirror.nl/") \n+}\n+library(lymphclon)\n+\n # ---------------------- parameters ----------------------\n \n args <- commandArgs(trailingOnly = TRUE)\n@@ -35,6 +40,7 @@\n species = args[5] #human or mouse\n locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD\n filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)\n+clonality_method = args[8]\n \n # ---------------------- Data preperation ----------------------\n \n@@ -217,7 +223,6 @@\n useD = FALSE\n cat("No D Genes in this species/locus")\n }\n-\n print(paste("useD:", useD))\n \n # ---------------------- merge with the frequency count ----------------------\n@@ -453,84 +458,96 @@\n \n if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available\n {\n- write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)\n- \n- ClonalitySampleReplicatePrint <- function(dat){\n- write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)\n- }\n- \n- clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")])\n- #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint)\n- \n- ClonalitySamplePrint <- function(dat){\n- write.table(dat, paste("clonality_", unique(inputdata$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)\n- }\n- \n- clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])\n- #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint)\n- \n- clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])\n- clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])\n- clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count\n- clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])\n- clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")\n- \n- ct = c(\'Type\\tWeight\\n2\\t1\\n3\\t3\\n4\\t6\\n5\\t10\\n6\\t15\')\n- tcct = textConnection(ct)\n- CT = read.table(tcct, sep="\\t", header=TRUE)\n- close(tcct)\n- clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)\n- clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight\n- \n- ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")])\n- ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])\n- clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)\n- ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads\n- \n- ReplicatePrint <- function(dat){\n- write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n+ if(clonality_method == "boyd"){\n+ samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)\n+ \n+ for (sample in samples){\n+ res = data.frame(paste=character(0))\n+ sample_id = unique(sample$Sample)[[1]]\n+ for(replicate in unique(sample$Replicate)){\n+ tmp = sample[sample$Replicate == replicate,]\n+ clone_table = data.frame(table(tmp$clonaltype))\n+ clone_col_name = paste("V", replicate, sep="")\n+ colnames(clone_table) = c("paste", clone_col_name)\n+ res = merge(res, clone_table, by="paste", all=T)\n+ }\n+ \n+ res[is.na(res)] = 0 \n+ infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))\n+ \n+ write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)\n+ \n+ res$type = rowSums(res[,2:ncol(res)])\n+ \n+ coinc'..b'alFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])\n+ clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)\n+ clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow\n+ clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)\n+ clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)\n+ \n+ ClonalityScorePrint <- function(dat){\n+ write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n+ }\n+ \n+ clonalityScore = clonalFreqCount[c("Sample", "Result")]\n+ clonalityScore = unique(clonalityScore)\n+ \n+ clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])\n+ lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)\n+ \n+ clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]\n+ \n+ \n+ \n+ ClonalityOverviewPrint <- function(dat){\n+ write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n+ }\n+ \n+ clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)\n+ lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)\n }\n- \n- ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])\n- lapply(ReplicateSplit, FUN=ReplicatePrint)\n- \n- ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])\n- clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)\n- \n- \n- ReplicateSumPrint <- function(dat){\n- write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n- }\n- \n- ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])\n- lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)\n- \n- clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])\n- clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)\n- clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow\n- clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)\n- clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)\n- \n- ClonalityScorePrint <- function(dat){\n- write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n- }\n- \n- clonalityScore = clonalFreqCount[c("Sample", "Result")]\n- clonalityScore = unique(clonalityScore)\n- \n- clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])\n- lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)\n- \n- clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]\n- \n- \n- \n- ClonalityOverviewPrint <- function(dat){\n- write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)\n- }\n- \n- clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)\n- lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)\n }\n \n imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")\n' |
| b |
| diff -r 01f05258f672 -r f919965e8816 r_wrapper.sh --- a/r_wrapper.sh Mon Feb 09 08:53:30 2015 -0500 +++ b/r_wrapper.sh Fri Mar 06 09:59:53 2015 -0500 |
| [ |
| @@ -7,6 +7,7 @@ species=$5 locus=$6 filterproductive=$7 +clonality_method=$8 dir="$(cd "$(dirname "$0")" && pwd)" useD="false" if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then @@ -22,9 +23,10 @@ useD="false" fi fi +useD="false" mkdir $3 cp $dir/genes.txt $outputDir -Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive 2>&1 +Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1 cp $dir/tabber.js $outputDir cp $dir/style.css $outputDir cp $dir/script.js $outputDir @@ -89,34 +91,49 @@ hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)" echo "$hasReplicateColumn" #if its a 'new' merged file with replicate info -if [[ "$hasReplicateColumn" == "Yes" ]] ; then +if [[ "$hasReplicateColumn" == "Yes" && "$clonalType" != "none" ]] ; then echo "<div class='tabbertab' title='Clonality'><div class='tabber'>" >> $outputFile for sample in $samples; do - clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)" - echo "<div class='tabbertab' title='$sample'><table border='1'>" >> $outputFile - echo "<tr><td colspan='4'>Clonality Score: $clonalityScore</td></tr>" >> $outputFile + echo "${clonality_method}" + if [[ "${clonality_method}" == "old" ]] ; then + echo "in old" + clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)" + echo "<div class='tabbertab' title='$sample'><table border='1'>" >> $outputFile + echo "<tr><td colspan='4'>Clonality Score: $clonalityScore</td></tr>" >> $outputFile - #replicate,reads,squared - echo "<tr><td>Replicate ID</td><td>Number of Reads</td><td>Reads Squared</td><td></td></tr>" >> $outputFile - while IFS=, read replicate reads squared - do + #replicate,reads,squared + echo "<tr><td>Replicate ID</td><td>Number of Reads</td><td>Reads Squared</td><td></td></tr>" >> $outputFile + while IFS=, read replicate reads squared + do + + echo "<tr><td>$replicate</td><td>$reads</td><td>$squared</td><td></td></tr>" >> $outputFile + done < $outputDir/ReplicateReads_$sample.csv - echo "<tr><td>$replicate</td><td>$reads</td><td>$squared</td><td></td></tr>" >> $outputFile - done < $outputDir/ReplicateReads_$sample.csv - - #sum of reads and reads squared - while IFS=, read readsSum squaredSum + #sum of reads and reads squared + while IFS=, read readsSum squaredSum + do + echo "<tr><td>Sum</td><td>$readsSum</td><td>$squaredSum</td></tr>" >> $outputFile + done < $outputDir/ReplicateSumReads_$sample.csv + + #overview + echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td><td>Coincidence Weight</td><td>Coincidences, Weighted</td></tr>" >> $outputFile + while IFS=, read type count weight weightedCount do - echo "<tr><td>Sum</td><td>$readsSum</td><td>$squaredSum</td></tr>" >> $outputFile - done < $outputDir/ReplicateSumReads_$sample.csv - - #overview - echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td><td>Coincidence Weight</td><td>Coincidences, Weighted</td></tr>" >> $outputFile - while IFS=, read type count weight weightedCount - do - echo "<tr><td>$type</td><td>$count</td><td>$weight</td><td>$weightedCount</td></tr>" >> $outputFile - done < $outputDir/ClonalityOverView_$sample.csv - echo "</table></div>" >> $outputFile + echo "<tr><td>$type</td><td>$count</td><td>$weight</td><td>$weightedCount</td></tr>" >> $outputFile + done < $outputDir/ClonalityOverView_$sample.csv + echo "</table></div>" >> $outputFile + else + echo "in new" + clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.csv)" + echo "<div class='tabbertab' title='$sample'>" >> $outputFile + echo "Lymphclon clonality score: <br />$clonalityScore<br /><br />" >> $outputFile + echo "<table border = 1>" >> $outputFile + while IFS=, read type count + do + echo "<tr><td>$type</td><td>$count</td></tr>" >> $outputFile + done < $outputDir/lymphclon_coincidences_$sample.csv + echo "</table></div>" >> $outputFile + fi done echo "</div></div>" >> $outputFile fi |
| b |
| diff -r 01f05258f672 -r f919965e8816 report_clonality_igg.xml --- a/report_clonality_igg.xml Mon Feb 09 08:53:30 2015 -0500 +++ b/report_clonality_igg.xml Fri Mar 06 09:59:53 2015 -0500 |
| b |
| @@ -2,14 +2,14 @@ <description> </description> <command interpreter="bash"> #if $gene_selection.source == "imgtdb" - r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive + r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method #else - r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive + r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method #end if </command> <inputs> <param name="in_file" format="tabular" type="data" label="Data to Process" /> - <param name="clonaltype" type="select" label="Clonal Type Definition"> + <param name="clonaltype" type="select" label="Clonal Type Definition (Needed for clonality calculation)"> <option value="none">Dont remove duplicates based on clonaltype</option> <option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3.Seq</option> <option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3.Seq.DNA</option> @@ -89,6 +89,12 @@ <option value="yes">Yes</option> <option value="no">No</option> </param> + + <param name="clonality_method" type="select" label="Old clonality algorithm or the newer R package"> + <option value="old">Old</option> + <option value="boyd">R Package</option> + </param> + </inputs> <outputs> <data format="html" name="out_file" /> |