# HG changeset patch
# User davidvanzessen
# Date 1470926152 14400
# Node ID 626a956f3811a7cdf646512b664bbc8169da8481
# Parent ad7ca9c2b748d6aebc933aeb32ba9ded7c8dd05a
Uploaded
diff -r ad7ca9c2b748 -r 626a956f3811 mutation_analysis.r
--- a/mutation_analysis.r Thu Aug 11 08:00:00 2016 -0400
+++ b/mutation_analysis.r Thu Aug 11 10:35:52 2016 -0400
@@ -276,17 +276,28 @@
transition2[is.na(transition2$value),]$value = 0
- png(filename=paste("transitions_stacked_", name, ".png", sep=""))
- p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity") #stacked bar
- p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
- print(p)
- dev.off()
-
- png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
- p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value), colour="white") + scale_fill_gradient(low="white", high="steelblue") #heatmap
- p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base")
- print(p)
- dev.off()
+ if(!all(transition2$value == 0)){ #having rows of data but a transition table filled with 0 is bad
+
+ print("Plotting stacked transition")
+
+ print(transition2)
+
+ png(filename=paste("transitions_stacked_", name, ".png", sep=""))
+ p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity") #stacked bar
+ p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
+ print(p)
+ dev.off()
+
+ print("Plotting heatmap transition")
+
+ png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
+ p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value), colour="white") + scale_fill_gradient(low="white", high="steelblue") #heatmap
+ p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base")
+ print(p)
+ dev.off()
+ } else {
+ print("No data to plot")
+ }
}
#print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep=""))
diff -r ad7ca9c2b748 -r 626a956f3811 mutation_analysis.xml
--- a/mutation_analysis.xml Thu Aug 11 08:00:00 2016 -0400
+++ b/mutation_analysis.xml Thu Aug 11 10:35:52 2016 -0400
@@ -20,10 +20,8 @@
-
-
-
-
+
+
@@ -40,9 +38,9 @@
-
+
-
+
diff -r ad7ca9c2b748 -r 626a956f3811 summary_to_fasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/summary_to_fasta.py Thu Aug 11 10:35:52 2016 -0400
@@ -0,0 +1,42 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
+parser.add_argument("--fasta", help="The output fasta file")
+
+args = parser.parse_args()
+
+infile = args.input
+fasta = args.fasta
+
+with open(infile, 'r') as i, open(fasta, 'w') as o:
+ first = True
+ id_col = 0
+ seq_col = 0
+ no_results = 0
+ no_seqs = 0
+ passed = 0
+ for line in i:
+ splt = line.split("\t")
+ if first:
+ id_col = splt.index("Sequence ID")
+ seq_col = splt.index("Sequence")
+ first = False
+ continue
+ if len(splt) < 5:
+ no_results += 1
+ continue
+
+ ID = splt[id_col]
+ seq = splt[seq_col]
+
+ if not len(seq) > 0:
+ no_seqs += 1
+ continue
+
+ o.write(">" + ID + "\n" + seq + "\n")
+ passed += 1
+
+ print "No results:", no_results
+ print "No sequences:", no_seqs
+ print "Written to fasta file:", passed
diff -r ad7ca9c2b748 -r 626a956f3811 wrapper.sh
--- a/wrapper.sh Thu Aug 11 08:00:00 2016 -0400
+++ b/wrapper.sh Thu Aug 11 10:35:52 2016 -0400
@@ -53,9 +53,12 @@
#cat $PWD/files/*/8_* > $PWD/mutationstats.txt
#cat $PWD/files/*/10_* > $PWD/hotspots.txt
-#BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
-
-echo "${BLASTN_DIR}"
+if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
+ echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
+else
+ BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
+ echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
+fi
echo "---------------- identification ($method) ----------------"
echo "---------------- identification ($method) ----------------
" >> $log
@@ -63,16 +66,24 @@
if [[ "${method}" == "custom" ]] ; then
python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
else
- ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
- ID_index=$((ID_index+1))
- sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
- sequence_index=$((sequence_index+1))
+ #ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
+ #ID_index=$((ID_index+1))
+ #sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
+ #sequence_index=$((sequence_index+1))
+
+ #echo "${ID_index}, ${sequence_index}"
- cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
+ #cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
+ #cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta
+
+ #cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences
- cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
+ echo "---------------- summary_to_fasta.py ----------------"
+ echo "---------------- summary_to_fasta.py ----------------
" >> $log
- rm $PWD/sequences.tmp
+ python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
+
+ #rm $PWD/sequences.tmp
echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt