Mercurial > repos > davidvanzessen > mutation_analysis

#!/bin/bash
set -e
dir="$(cd "$(dirname "$0")" && pwd)"
input=$1
method=$2
output=$3
outdir=$4
title=$5
mkdir $outdir

unzip $input -d $PWD/files/ > $PWD/unziplog.log
cat $PWD/files/*/1_* > $PWD/summary.txt
cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
cat $PWD/files/*/8_* > $PWD/mutationstats.txt
cat $PWD/files/*/10_* > $PWD/hotspots.txt

#BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"

echo "${BLASTN_DIR}"


echo "identification ($method)"

if [[ "${method}" == "custom" ]] ; then
	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
else
	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
	ID_index=$((ID_index+1))
	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
	sequence_index=$((sequence_index+1))

	echo "$ID_index ${sequence_index}"

	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta

	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
fi


echo "merging"
Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method

genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
echo "R mutation analysis"
Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir 2>&1
echo "python mutation analysis"
python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt

cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt

genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)


echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
echo "<tr><th>info</th>" >> $output
for gene in ${genes[@]}
do
	tmp=`cat $outdir/${gene}_n.txt`
	echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
done
tmp=`cat $outdir/total_n.txt`
echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output

while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
	do
		echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
done < $outdir/result.txt
echo "</table>" >> $output
echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output


echo "<img src='all.png'/><br />" >> $output
if [ -a $outdir/ca.png ]
then
	echo "<img src='ca.png'/><br />" >> $output
fi
if [ -a $outdir/cg.png ]
then
	echo "<img src='cg.png'/><br />" >> $output
fi

for gene in ${genes[@]}
do
	echo "<table border='1'><caption>$gene transition table</caption>" >> $output
	while IFS=, read from a c g t
		do
			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
	done < $outdir/transitions_${gene}.txt
	echo "</table>" >> $output
done

echo "<table border='1'><caption>All transition table</caption>" >> $output
while IFS=, read from a c g t
	do
		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
done < $outdir/transitions.txt
echo "</table>" >> $output

echo "</html>" >> $output

#rm $outdir/HS12RSS.txt
#rm $outdir/HS23RSS.txt
author	davidvanzessen
date	Wed, 01 Apr 2015 05:26:52 -0400
parents	c518cf0d4adb
children	c9f9623f1f76