Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 119:626a956f3811 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 11 Aug 2016 10:35:52 -0400 |
parents | a8f91c52411c |
children | 613278c1bde0 |
comparison
equal
deleted
inserted
replaced
118:ad7ca9c2b748 | 119:626a956f3811 |
---|---|
51 #cat $PWD/files/*/6_* > $PWD/junction.txt | 51 #cat $PWD/files/*/6_* > $PWD/junction.txt |
52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt | 52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt |
53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt | 53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt |
54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt | 54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt |
55 | 55 |
56 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 56 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then |
57 | 57 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" |
58 echo "${BLASTN_DIR}" | 58 else |
59 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" | |
60 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" | |
61 fi | |
59 | 62 |
60 echo "---------------- identification ($method) ----------------" | 63 echo "---------------- identification ($method) ----------------" |
61 echo "---------------- identification ($method) ----------------<br />" >> $log | 64 echo "---------------- identification ($method) ----------------<br />" >> $log |
62 | 65 |
63 if [[ "${method}" == "custom" ]] ; then | 66 if [[ "${method}" == "custom" ]] ; then |
64 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 67 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt |
65 else | 68 else |
66 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 69 #ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) |
67 ID_index=$((ID_index+1)) | 70 #ID_index=$((ID_index+1)) |
68 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 71 #sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) |
69 sequence_index=$((sequence_index+1)) | 72 #sequence_index=$((sequence_index+1)) |
70 | 73 |
71 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp | 74 #echo "${ID_index}, ${sequence_index}" |
72 | 75 |
73 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences | 76 #cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp |
74 | 77 #cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta |
75 rm $PWD/sequences.tmp | 78 |
79 #cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences | |
80 | |
81 echo "---------------- summary_to_fasta.py ----------------" | |
82 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log | |
83 | |
84 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta | |
85 | |
86 #rm $PWD/sequences.tmp | |
76 | 87 |
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 88 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt |
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 89 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt |
79 fi | 90 fi |
80 | 91 |