diff wrapper.sh @ 119:626a956f3811 draft

Uploaded
author davidvanzessen
date Thu, 11 Aug 2016 10:35:52 -0400
parents a8f91c52411c
children 613278c1bde0
line wrap: on
line diff
--- a/wrapper.sh	Thu Aug 11 08:00:00 2016 -0400
+++ b/wrapper.sh	Thu Aug 11 10:35:52 2016 -0400
@@ -53,9 +53,12 @@
 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
 
-#BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
-
-echo "${BLASTN_DIR}"
+if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
+	echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
+else
+	BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
+	echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
+fi
 
 echo "---------------- identification ($method) ----------------"
 echo "---------------- identification ($method) ----------------<br />" >> $log
@@ -63,16 +66,24 @@
 if [[ "${method}" == "custom" ]] ; then
 	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
 else
-	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
-	ID_index=$((ID_index+1))
-	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
-	sequence_index=$((sequence_index+1))
+	#ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
+	#ID_index=$((ID_index+1))
+	#sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
+	#sequence_index=$((sequence_index+1))
+	
+	#echo "${ID_index}, ${sequence_index}"
 	
-	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
+	#cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
+	#cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta
+	
+	#cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences
 	
-	cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
+	echo "---------------- summary_to_fasta.py ----------------"
+	echo "---------------- summary_to_fasta.py ----------------<br />" >> $log
 	
-	rm $PWD/sequences.tmp
+	python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
+	
+	#rm $PWD/sequences.tmp
 	
 	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
 	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt