comparison wrapper.sh @ 119:626a956f3811 draft

Uploaded
author davidvanzessen
date Thu, 11 Aug 2016 10:35:52 -0400
parents a8f91c52411c
children 613278c1bde0
comparison
equal deleted inserted replaced
118:ad7ca9c2b748 119:626a956f3811
51 #cat $PWD/files/*/6_* > $PWD/junction.txt 51 #cat $PWD/files/*/6_* > $PWD/junction.txt
52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt 52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt 53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt 54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
55 55
56 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" 56 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
57 57 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
58 echo "${BLASTN_DIR}" 58 else
59 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
60 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
61 fi
59 62
60 echo "---------------- identification ($method) ----------------" 63 echo "---------------- identification ($method) ----------------"
61 echo "---------------- identification ($method) ----------------<br />" >> $log 64 echo "---------------- identification ($method) ----------------<br />" >> $log
62 65
63 if [[ "${method}" == "custom" ]] ; then 66 if [[ "${method}" == "custom" ]] ; then
64 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt 67 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
65 else 68 else
66 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) 69 #ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
67 ID_index=$((ID_index+1)) 70 #ID_index=$((ID_index+1))
68 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) 71 #sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
69 sequence_index=$((sequence_index+1)) 72 #sequence_index=$((sequence_index+1))
70 73
71 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp 74 #echo "${ID_index}, ${sequence_index}"
72 75
73 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences 76 #cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
74 77 #cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta
75 rm $PWD/sequences.tmp 78
79 #cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences
80
81 echo "---------------- summary_to_fasta.py ----------------"
82 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log
83
84 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
85
86 #rm $PWD/sequences.tmp
76 87
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt 88 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt 89 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
79 fi 90 fi
80 91