Mercurial > repos > davidvanzessen > mutation_analysis
diff wrapper.sh @ 119:626a956f3811 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 11 Aug 2016 10:35:52 -0400 |
parents | a8f91c52411c |
children | 613278c1bde0 |
line wrap: on
line diff
--- a/wrapper.sh Thu Aug 11 08:00:00 2016 -0400 +++ b/wrapper.sh Thu Aug 11 10:35:52 2016 -0400 @@ -53,9 +53,12 @@ #cat $PWD/files/*/8_* > $PWD/mutationstats.txt #cat $PWD/files/*/10_* > $PWD/hotspots.txt -#BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" - -echo "${BLASTN_DIR}" +if [[ ${#BLASTN_DIR} -ge 5 ]] ; then + echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" +else + BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" + echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" +fi echo "---------------- identification ($method) ----------------" echo "---------------- identification ($method) ----------------<br />" >> $log @@ -63,16 +66,24 @@ if [[ "${method}" == "custom" ]] ; then python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt else - ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) - ID_index=$((ID_index+1)) - sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) - sequence_index=$((sequence_index+1)) + #ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) + #ID_index=$((ID_index+1)) + #sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) + #sequence_index=$((sequence_index+1)) + + #echo "${ID_index}, ${sequence_index}" - cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp + #cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp + #cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta + + #cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences - cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences + echo "---------------- summary_to_fasta.py ----------------" + echo "---------------- summary_to_fasta.py ----------------<br />" >> $log - rm $PWD/sequences.tmp + python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta + + #rm $PWD/sequences.tmp echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt