# HG changeset patch # User davidvanzessen # Date 1470926152 14400 # Node ID 626a956f3811a7cdf646512b664bbc8169da8481 # Parent ad7ca9c2b748d6aebc933aeb32ba9ded7c8dd05a Uploaded diff -r ad7ca9c2b748 -r 626a956f3811 mutation_analysis.r --- a/mutation_analysis.r Thu Aug 11 08:00:00 2016 -0400 +++ b/mutation_analysis.r Thu Aug 11 10:35:52 2016 -0400 @@ -276,17 +276,28 @@ transition2[is.na(transition2$value),]$value = 0 - png(filename=paste("transitions_stacked_", name, ".png", sep="")) - p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity") #stacked bar - p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) - print(p) - dev.off() - - png(filename=paste("transitions_heatmap_", name, ".png", sep="")) - p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value), colour="white") + scale_fill_gradient(low="white", high="steelblue") #heatmap - p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") - print(p) - dev.off() + if(!all(transition2$value == 0)){ #having rows of data but a transition table filled with 0 is bad + + print("Plotting stacked transition") + + print(transition2) + + png(filename=paste("transitions_stacked_", name, ".png", sep="")) + p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity") #stacked bar + p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) + print(p) + dev.off() + + print("Plotting heatmap transition") + + png(filename=paste("transitions_heatmap_", name, ".png", sep="")) + p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value), colour="white") + scale_fill_gradient(low="white", high="steelblue") #heatmap + p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + print(p) + dev.off() + } else { + print("No data to plot") + } } #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep="")) diff -r ad7ca9c2b748 -r 626a956f3811 mutation_analysis.xml --- a/mutation_analysis.xml Thu Aug 11 08:00:00 2016 -0400 +++ b/mutation_analysis.xml Thu Aug 11 10:35:52 2016 -0400 @@ -20,10 +20,8 @@ - - - - + + @@ -40,9 +38,9 @@ - + - + diff -r ad7ca9c2b748 -r 626a956f3811 summary_to_fasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summary_to_fasta.py Thu Aug 11 10:35:52 2016 -0400 @@ -0,0 +1,42 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") +parser.add_argument("--fasta", help="The output fasta file") + +args = parser.parse_args() + +infile = args.input +fasta = args.fasta + +with open(infile, 'r') as i, open(fasta, 'w') as o: + first = True + id_col = 0 + seq_col = 0 + no_results = 0 + no_seqs = 0 + passed = 0 + for line in i: + splt = line.split("\t") + if first: + id_col = splt.index("Sequence ID") + seq_col = splt.index("Sequence") + first = False + continue + if len(splt) < 5: + no_results += 1 + continue + + ID = splt[id_col] + seq = splt[seq_col] + + if not len(seq) > 0: + no_seqs += 1 + continue + + o.write(">" + ID + "\n" + seq + "\n") + passed += 1 + + print "No results:", no_results + print "No sequences:", no_seqs + print "Written to fasta file:", passed diff -r ad7ca9c2b748 -r 626a956f3811 wrapper.sh --- a/wrapper.sh Thu Aug 11 08:00:00 2016 -0400 +++ b/wrapper.sh Thu Aug 11 10:35:52 2016 -0400 @@ -53,9 +53,12 @@ #cat $PWD/files/*/8_* > $PWD/mutationstats.txt #cat $PWD/files/*/10_* > $PWD/hotspots.txt -#BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" - -echo "${BLASTN_DIR}" +if [[ ${#BLASTN_DIR} -ge 5 ]] ; then + echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" +else + BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" + echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" +fi echo "---------------- identification ($method) ----------------" echo "---------------- identification ($method) ----------------
" >> $log @@ -63,16 +66,24 @@ if [[ "${method}" == "custom" ]] ; then python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt else - ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) - ID_index=$((ID_index+1)) - sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) - sequence_index=$((sequence_index+1)) + #ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) + #ID_index=$((ID_index+1)) + #sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) + #sequence_index=$((sequence_index+1)) + + #echo "${ID_index}, ${sequence_index}" - cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp + #cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp + #cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta + + #cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences - cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences + echo "---------------- summary_to_fasta.py ----------------" + echo "---------------- summary_to_fasta.py ----------------
" >> $log - rm $PWD/sequences.tmp + python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta + + #rm $PWD/sequences.tmp echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt