Mercurial > repos > davidvanzessen > mutation_analysis
changeset 110:ade5cf6fd2dc draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 02 Aug 2016 08:30:23 -0400 |
parents | 0096cd454380 |
children | de3331cf6368 |
files | aa_histogram.r merge_and_filter.r mutation_analysis.py mutation_analysis.r mutation_analysis.xml style.tar.gz tmp/igat.r wrapper.sh |
diffstat | 8 files changed, 244 insertions(+), 138 deletions(-) [+] |
line wrap: on
line diff
--- a/aa_histogram.r Thu Jul 14 07:29:56 2016 -0400 +++ b/aa_histogram.r Tue Aug 02 08:30:23 2016 -0400 @@ -15,47 +15,47 @@ absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") for(gene in genes){ - - if(gene == ""){ - mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] - absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] - } else { - mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] - absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] - } - if(nrow(mutations.by.id.gene) == 0){ - next - } - - print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) - - mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) - aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) - dat_freq = mutations.at.position / aa.at.position - dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) + if(gene == ""){ + mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] + } else { + mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] + } + print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) + if(nrow(mutations.by.id.gene) == 0){ + next + } - print("---------------- plot ----------------") + mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) + aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) + + dat_freq = mutations.at.position / aa.at.position + dat_freq[is.na(dat_freq)] = 0 + dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) + + print("---------------- plot ----------------") - m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) - m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq)) - m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") - m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") - m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") - m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") - m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") - m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) + m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) + m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") + m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") + m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") + m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") + m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") + m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) + + print("---------------- write/print ----------------") - print("---------------- write/print ----------------") - - png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) - print(m) - dev.off() - - dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) - - write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) + print(m) + dev.off() + + dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) + + write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) }
--- a/merge_and_filter.r Thu Jul 14 07:29:56 2016 -0400 +++ b/merge_and_filter.r Tue Aug 02 08:30:23 2016 -0400 @@ -34,13 +34,24 @@ } -print(paste("Number of sequences in summary file:", nrow(summ))) +input.sequence.count = nrow(summ) +print(paste("Number of sequences in summary file:", input.sequence.count)) + +filtering.steps = data.frame(character(0), numeric(0)) + +filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count)) + +filtering.steps[,1] = as.character(filtering.steps[,1]) +filtering.steps[,2] = as.character(filtering.steps[,2]) +#filtering.steps[,3] = as.numeric(filtering.steps[,3]) summ = merge(summ, gene_identification, by="Sequence.ID") summ = summ[summ$Functionality != "No results",] -print(paste("Number of sequences after merging with annotation:", nrow(summ))) +print(paste("Number of sequences after 'No results' filter:", nrow(summ))) + +filtering.steps = rbind(filtering.steps, c("After 'No results' filter", nrow(summ))) if(functionality == "productive"){ summ = summ[summ$Functionality == "productive (see comment)" | summ$Functionality == "productive",] @@ -52,6 +63,8 @@ print(paste("Number of sequences after productive filter:", nrow(summ))) +filtering.steps = rbind(filtering.steps, c("After productive filter", nrow(summ))) + splt = strsplit(class_filter, "_")[[1]] chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) @@ -70,7 +83,6 @@ if(any(higher_than, na.rm=T)){ #summ = summ[higher_than,] } -print(paste("Number of matched sequences:", sum(!grepl("^unmatched", summ$best_match)))) if(nrow(summ) == 0){ stop("No data remaining after filter") @@ -109,6 +121,8 @@ print(paste("Number of sequences in result after", unique_type, "filtering:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After duplicate filter", nrow(result))) + #remove the sequences that have an 'n' (or 'N') in the FR2, FR3, CDR1 and CDR2 regions. sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")] names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") @@ -124,10 +138,12 @@ result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result))) result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] print(paste("Number of sequences in result after n filtering:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) cleanup_columns = c("FR1.IMGT.Nb.of.mutations", "CDR1.IMGT.Nb.of.mutations", @@ -176,8 +192,20 @@ print(paste("Number of sequences in result after CDR/FR filtering:", nrow(result))) print(paste("Number of matched sequences in result after CDR/FR filtering:", nrow(result[!grepl("unmatched", result$best_match),]))) +filtering.steps = rbind(filtering.steps, c("After unique filter", nrow(result))) + print(paste("Number of rows in result:", nrow(result))) print(paste("Number of rows in unmatched:", nrow(unmatched))) +matched.sequences.count = sum(!grepl("^unmatched", result$best_match)) +unmatched.sequences.count = sum(grepl("^unmatched", result$best_match)) + +filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count)) +filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count)) +filtering.steps[,2] = as.numeric(filtering.steps[,2]) +filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2) + +write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\t",quote=F,row.names=F,col.names=F) + write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T) write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
--- a/mutation_analysis.py Thu Jul 14 07:29:56 2016 -0400 +++ b/mutation_analysis.py Tue Aug 02 08:30:23 2016 -0400 @@ -22,6 +22,7 @@ mutationdic = dict() mutationMatcher = re.compile("^(.)(\d+).(.),?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?") +NAMatchResult = (None, None, None, None, None, None, '') linecount = 0 IDIndex = 0 @@ -58,15 +59,19 @@ ID = linesplt[IDIndex] genedic[ID] = linesplt[best_matchIndex] try: - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] + if linesplt[fr1Index] != "NA": + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] + else: + mutationdic[ID + "_FR1"] = [] + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] if linesplt[cdr1Index] != "NA" else [] + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] if linesplt[fr2Index] != "NA" else [] + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] if linesplt[cdr2Index] != "NA" else [] mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] - mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] - except: + mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] if linesplt[fr3Index] != "NA" else [] + except e: print linesplt print linecount + print e mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] @@ -79,6 +84,8 @@ IDlist += [ID] AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent +if AALength < 60: + AALength = 64 AA_mutation = [0] * AALength AA_mutation_dic = {"ca": AA_mutation[:], "cg": AA_mutation[:], "cm": AA_mutation[:], "un": AA_mutation[:]}
--- a/mutation_analysis.r Thu Jul 14 07:29:56 2016 -0400 +++ b/mutation_analysis.r Tue Aug 02 08:30:23 2016 -0400 @@ -307,26 +307,28 @@ write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) } +sum.table = read.table("mutations_sum.txt", sep=",", header=F) +median.table = read.table("mutations_median.txt", sep=",", header=F) + +#sum.table["Median of Number of Mutations (%)",] = median.table[1,] + +new.table = sum.table[1,] +new.table[2,] = median.table[1,] +new.table[3:12,] = sum.table[2:11,] +new.table[,1] = as.character(new.table[,1]) +new.table[2,1] = "Median of Number of Mutations (%)" + +#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] + +write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) + + if (!("ggplot2" %in% rownames(installed.packages()))) { install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") } -genesForPlot = gsub("[0-9]", "", dat$best_match) -genesForPlot = data.frame(table(genesForPlot)) -colnames(genesForPlot) = c("Gene","Freq") -genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) -write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) - - -pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) -pc = pc + geom_bar(width = 1, stat = "identity") -pc = pc + coord_polar(theta="y") -pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) - -png(filename="all.png") -pc -dev.off() +dat = dat[!grepl("^unmatched", dat$best_match),] #blegh genesForPlot = dat[grepl("ca", dat$best_match),]$best_match @@ -377,11 +379,6 @@ write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) - - - - - dat$best_match_class = substr(dat$best_match, 0, 2) freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels)
--- a/mutation_analysis.xml Thu Jul 14 07:29:56 2016 -0400 +++ b/mutation_analysis.xml Tue Aug 02 08:30:23 2016 -0400 @@ -5,7 +5,7 @@ </command> <inputs> <param name="in_file" type="data" label="IMGT zip file to be analysed" /> - <param name="method" type="select" label="Identification method" help="" > + <param name="method" type="select" label="Method of identification of C region" help="" > <option value="custom" selected="true">custom</option> <option value="blastn">blastn</option> </param> @@ -27,10 +27,10 @@ <option value="no" selected="true">No</option> </param> <param name="unique" type="select" label="Remove duplicates based on" help="" > - <option value="AA.JUNCTION_V_subclass" selected="true">AA.JUNCTION + V + subclass</option> - <option value="AA.JUNCTION_subclass">AA.JUNCTION + subclass</option> - <option value="AA.JUNCTION_V">AA.JUNCTION + V</option> - <option value="AA.JUNCTION">AA.JUNCTION</option> + <option value="AA.JUNCTION_V_subclass" selected="true">Top.V.Gene, CDR3.Seq, C region</option> + <option value="AA.JUNCTION_subclass">CDR3.Seq + C region</option> + <option value="AA.JUNCTION_V">CDR3.seq + Top.V.Gene</option> + <option value="AA.JUNCTION">CDR3.seq</option> <option value="none">Don't remove duplicates</option> </param> <param name="class_filter" type="select" label="Class/Subclass filter" help="" >
--- a/tmp/igat.r Thu Jul 14 07:29:56 2016 -0400 +++ b/tmp/igat.r Tue Aug 02 08:30:23 2016 -0400 @@ -15,9 +15,9 @@ for(f in list.files(imgt.dir, pattern="*.txt$")){ #print(paste("filtering", f)) path = paste(imgt.dir, f, sep="") - dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F) + dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE) - dat = dat[dat$Sequence.ID %in% merged$Sequence.ID,] + dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file dat[,grepl("^FR1", names(dat))] = 0
--- a/wrapper.sh Thu Jul 14 07:29:56 2016 -0400 +++ b/wrapper.sh Tue Aug 02 08:30:23 2016 -0400 @@ -1,5 +1,5 @@ #!/bin/bash -set -e +#set -e dir="$(cd "$(dirname "$0")" && pwd)" input=$1 method=$2 @@ -17,13 +17,13 @@ class_filter=${13} mkdir $outdir +tar -xzf $dir/style.tar.gz -C $outdir + echo "---------------- read parameters ----------------" echo "---------------- read parameters ----------------<br />" > $log echo "unpacking IMGT file" - - type="`file $input`" if [[ "$type" == *"Zip archive"* ]] ; then echo "Zip archive" @@ -67,7 +67,11 @@ sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) sequence_index=$((sequence_index+1)) - cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta + cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp + + cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences + + rm $PWD/sequences.tmp echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt @@ -149,12 +153,15 @@ echo "---------------- aa_histogram.r ----------------<br />" >> $log Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 -mv $outdir/aa_histogram_.png $outdir/aa_histogram.png -mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt +if [ -e "$outdir/aa_histogram_.png" ]; then + mv $outdir/aa_histogram_.png $outdir/aa_histogram.png + mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt +fi genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) funcs=(sum mean median) +funcs=(sum) echo "---------------- sequence_overview.r ----------------" echo "---------------- sequence_overview.r ----------------<br />" >> $log @@ -172,6 +179,10 @@ done < $outdir/sequence_overview/ntoverview.txt echo "<html><center><h1>$title</h1></center>" > $output +echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output +echo "<script type='text/javascript' src='tabber.js'></script>" >> $output +echo "<script type='text/javascript' src='script.js'></script>" >> $output +echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output #display the matched/unmatched for clearity @@ -188,6 +199,10 @@ echo "---------------- main tables ----------------" echo "---------------- main tables ----------------<br />" >> $log + +echo "<div class='tabber'>" >> $output +echo "<div class='tabbertab' title='SHM Overview'>" >> $output + for func in ${funcs[@]} do @@ -221,9 +236,84 @@ #echo "<a href='data_${func}.txt'>Download data</a>" >> $output done -echo "---------------- download links ----------------" -echo "---------------- download links ----------------<br />" >> $log +echo "</div>" >> $output #SHM overview tab end + +echo "---------------- images ----------------" +echo "---------------- images ----------------<br />" >> $log + +echo "<div class='tabbertab' title='SHM Frequency'>" >> $output + +if [ -a $outdir/scatter.png ] +then + echo "<img src='scatter.png'/><br />" >> $output + echo "<a href='scatter.txt'>download data</a><br />" >> $output +fi +if [ -a $outdir/frequency_ranges.png ] +then + echo "<img src='frequency_ranges.png'/><br />" >> $output + echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output + echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output +fi + +echo "</div>" >> $output #SHM frequency tab end + +echo "<div class='tabbertab' title='Transition tables'>" >> $output + +for gene in ${genes[@]} +do + echo "<table border='1'><caption>$gene transition table</caption>" >> $output + while IFS=, read from a c g t + do + echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output + done < $outdir/transitions_${gene}_sum.txt + echo "</table>" >> $output +done +echo "<table border='1'><caption>All transition table</caption>" >> $output +while IFS=, read from a c g t + do + echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output +done < $outdir/transitions_all_sum.txt +echo "</table>" >> $output + +echo "</div>" >> $output #transition tables tab end + +echo "<div class='tabbertab' title='Antigen Selection'>" >> $output + +if [ -a $outdir/aa_histogram.png ] +then + echo "<img src='aa_histogram.png'/><br />" >> $output + echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output + echo "<img src='aa_histogram_ca.png'/><br />" >> $output + echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output + echo "<img src='aa_histogram_cg.png'/><br />" >> $output + echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output + echo "<img src='aa_histogram_cm.png'/><br />" >> $output + echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output +fi + +echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output +echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output +echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output + +echo "</div>" >> $output #antigen selection tab end + +echo "<div class='tabbertab' title='CSR'>" >> $output + +if [ -a $outdir/ca.png ] +then + echo "<img src='ca.png'/><br />" >> $output + echo "<a href='ca.txt'>download data</a><br />" >> $output +fi +if [ -a $outdir/cg.png ] +then + echo "<img src='cg.png'/><br />" >> $output + echo "<a href='cg.txt'>download data</a><br />" >> $output +fi + +echo "</div>" >> $output #CSR tab end + +echo "<div class='tabbertab' title='Downloads'>" >> $output echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output @@ -250,61 +340,9 @@ echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output - -echo "---------------- images ----------------" -echo "---------------- images ----------------<br />" >> $log +echo "</div>" >> $output #downloads tab end -echo "<img src='all.png'/><br />" >> $output -echo "<a href='all.txt'>download data</a><br />" >> $output -if [ -a $outdir/ca.png ] -then - echo "<img src='ca.png'/><br />" >> $output - echo "<a href='ca.txt'>download data</a><br />" >> $output -fi -if [ -a $outdir/cg.png ] -then - echo "<img src='cg.png'/><br />" >> $output - echo "<a href='cg.txt'>download data</a><br />" >> $output -fi -if [ -a $outdir/scatter.png ] -then - echo "<img src='scatter.png'/><br />" >> $output - echo "<a href='scatter.txt'>download data</a><br />" >> $output -fi -if [ -a $outdir/frequency_ranges.png ] -then - echo "<img src='frequency_ranges.png'/><br />" >> $output - echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output - echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output -fi -if [ -a $outdir/aa_histogram.png ] -then - echo "<img src='aa_histogram.png'/><br />" >> $output - echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output - echo "<img src='aa_histogram_ca.png'/><br />" >> $output - echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output - echo "<img src='aa_histogram_cg.png'/><br />" >> $output - echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output - echo "<img src='aa_histogram_cm.png'/><br />" >> $output - echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output -fi - -for gene in ${genes[@]} -do - echo "<table border='1'><caption>$gene transition table</caption>" >> $output - while IFS=, read from a c g t - do - echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output - done < $outdir/transitions_${gene}_sum.txt - echo "</table>" >> $output -done - -echo "<table border='1'><caption>All transition table</caption>" >> $output -while IFS=, read from a c g t - do - echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output -done < $outdir/transitions_all_sum.txt -echo "</table>" >> $output +echo "</div>" >> $output #tabs end echo "</html>" >> $output @@ -368,8 +406,44 @@ mv $log $outdir/log.html -cp $outdir/index.html $log +echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log +echo "<table border = 1>" >> $log +echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log +tIFS="$TMP" +IFS=$'\t' +while read step seq perc + do + echo "<tr>" >> $log + echo "<td>$step</td>" >> $log + echo "<td>$seq</td>" >> $log + echo "<td>${perc}%</td>" >> $log + echo "</tr>" >> $log +done < $outdir/filtering_steps.txt +echo "</table border></center></html>" >> $log + +IFS="$tIFS" + echo "---------------- Done! ----------------" echo "---------------- Done! ----------------<br />" >> $outdir/log.html + + + + + + + + + + + + + + + + + + + +