argalaxy_tools: wrapper.sh comparison

comparison wrapper.sh @ 11:0510cf1f7cbc draft

Uploaded

author	davidvanzessen
date	Tue, 04 Aug 2015 09:59:26 -0400
parents
children

comparison

equal deleted inserted replaced

-:edbf4fba5fc7
+:0510cf1f7cbc
+#!/bin/bash
+set -e
+dir="$(cd "$(dirname "$0")" && pwd)"
+input=$1
+method=$2
+output=$3
+outdir=$4
+title=$5
+include_fr1=$6
+functionality=$7
+unique=$8
+mkdir $outdir
+type="`file $input`"
+if [[ "$type" == *"Zip archive"* ]] ; then
+	echo "Zip archive"
+	echo "unzip $input -d $PWD/files/"
+	unzip $input -d $PWD/files/
+elif [[ "$type" == *"XZ compressed data"* ]] ; then
+	echo "ZX archive"
+	echo "tar -xJf $input -C $PWD/files/"
+	mkdir -p $PWD/files/$title
+	tar -xJf $input -C $PWD/files/$title
+fi
+cat $PWD/files/*/1_* > $PWD/summary.txt
+cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
+cat $PWD/files/*/8_* > $PWD/mutationstats.txt
+cat $PWD/files/*/10_* > $PWD/hotspots.txt
+#BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
+echo "${BLASTN_DIR}"
+echo "identification ($method)"
+if [[ "${method}" == "custom" ]] ; then
+	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
+else
+	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
+	ID_index=$((ID_index+1))
+	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
+	sequence_index=$((sequence_index+1))
+	echo "$ID_index ${sequence_index}"
+	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
+	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
+	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
+fi
+echo "merging"
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
+genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
+echo "R mutation analysis"
+Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
+echo "python mutation analysis"
+python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
+echo "R AA histogram"
+Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
+cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
+genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
+echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
+echo "<tr><th>info</th>" >> $output
+for gene in ${genes[@]}
+do
+	tmp=`cat $outdir/${gene}_n.txt`
+	echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
+done
+tmp=`cat $outdir/total_n.txt`
+echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
+while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
+do
+	if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
+		echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
+	else
+		echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
+	fi
+done < $outdir/result.txt
+echo "</table>" >> $output
+echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
+echo "<img src='all.png'/><br />" >> $output
+echo "<a href='all.txt'>download data</a><br />" >> $output
+if [ -a $outdir/ca.png ]
+then
+	echo "<img src='ca.png'/><br />" >> $output
+	echo "<a href='ca.txt'>download data</a><br />" >> $output
+fi
+if [ -a $outdir/cg.png ]
+then
+	echo "<img src='cg.png'/><br />" >> $output
+	echo "<a href='cg.txt'>download data</a><br />" >> $output
+fi
+if [ -a $outdir/scatter.png ]
+then
+	echo "<img src='scatter.png'/><br />" >> $output
+	echo "<a href='scatter.txt'>download data</a><br />" >> $output
+fi
+if [ -a $outdir/aa_histogram.png ]
+then
+	echo "<img src='aa_histogram.png'/><br />" >> $output
+	echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
+fi
+for gene in ${genes[@]}
+do
+	echo "<table border='1'><caption>$gene transition table</caption>" >> $output
+	while IFS=, read from a c g t
+		do
+			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+	done < $outdir/transitions_${gene}.txt
+	echo "</table>" >> $output
+done
+echo "<table border='1'><caption>All transition table</caption>" >> $output
+while IFS=, read from a c g t
+	do
+		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+done < $outdir/transitions.txt
+echo "</table>" >> $output
+echo "</html>" >> $output
+#rm $outdir/HS12RSS.txt
+#rm $outdir/HS23RSS.txt

Mercurial > repos > davidvanzessen > argalaxy_tools

comparison wrapper.sh @ 11:0510cf1f7cbc draft