comparison wrapper.sh @ 89:480fdd383fdb draft

Uploaded
author davidvanzessen
date Tue, 31 May 2016 08:30:50 -0400
parents 07f7da724a77
children f0e8dac22c6e
comparison
equal deleted inserted replaced
88:d57c624a9aa9 89:480fdd383fdb
19 echo "---------------- read parameters ----------------" 19 echo "---------------- read parameters ----------------"
20 echo "---------------- read parameters ----------------<br />" > $output 20 echo "---------------- read parameters ----------------<br />" > $output
21 21
22 echo "unpacking IMGT file" 22 echo "unpacking IMGT file"
23 23
24
25
24 type="`file $input`" 26 type="`file $input`"
25 if [[ "$type" == *"Zip archive"* ]] ; then 27 if [[ "$type" == *"Zip archive"* ]] ; then
26 echo "Zip archive" 28 echo "Zip archive"
27 echo "unzip $input -d $PWD/files/" 29 echo "unzip $input -d $PWD/files/"
28 unzip $input -d $PWD/files/ 30 unzip $input -d $PWD/files/
39 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt 41 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
40 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt 42 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
41 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt 43 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
42 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt 44 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
43 45
44
45
46 #cat $PWD/files/*/1_* > $PWD/summary.txt 46 #cat $PWD/files/*/1_* > $PWD/summary.txt
47 #cat $PWD/files/*/3_* > $PWD/sequences.txt 47 #cat $PWD/files/*/3_* > $PWD/sequences.txt
48 #cat $PWD/files/*/5_* > $PWD/aa.txt 48 #cat $PWD/files/*/5_* > $PWD/aa.txt
49 #cat $PWD/files/*/6_* > $PWD/junction.txt 49 #cat $PWD/files/*/6_* > $PWD/junction.txt
50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt 50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" 54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
55 55
56 echo "${BLASTN_DIR}" 56 echo "${BLASTN_DIR}"
57 57
58 echo "identification ($method)" 58 echo "identification ($method)"
59 echo "identification ($method)<br />" >> $output 59 echo "---------------- identification ($method) ----------------"
60 60 echo "---------------- identification ($method) ----------------<br />" >> $output
61 echo "blast or custom"
62 61
63 if [[ "${method}" == "custom" ]] ; then 62 if [[ "${method}" == "custom" ]] ; then
64 echo "custom"
65 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt 63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
66 else 64 else
67 echo "blast"
68 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
69 ID_index=$((ID_index+1)) 66 ID_index=$((ID_index+1))
70 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) 67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
71 sequence_index=$((sequence_index+1)) 68 sequence_index=$((sequence_index+1))
72 69
73 echo "$ID_index ${sequence_index}"
74
75 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta 70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
76 71
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt 72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt 73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
79 fi 74 fi
108 echo "---------------- sequence_overview.r ----------------" 103 echo "---------------- sequence_overview.r ----------------"
109 104
110 mkdir $outdir/sequence_overview 105 mkdir $outdir/sequence_overview
111 106
112 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 107 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
108 #Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
113 109
114 echo "<table border='1'>" > $outdir/base_overview.html 110 echo "<table border='1'>" > $outdir/base_overview.html
115 111
116 while read ID class seq A C G T 112 while read ID class seq A C G T
117 do 113 do
151 done 147 done
152 148
153 tmp=`cat $outdir/unmatched_${func}_n.txt` 149 tmp=`cat $outdir/unmatched_${func}_n.txt`
154 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output 150 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
155 tmp=`cat $outdir/all_${func}_n.txt` 151 tmp=`cat $outdir/all_${func}_n.txt`
156 echo "<th><a href='matched_${func}_all.txt'>all (N = $tmp)</a></th>" >> $output 152 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
157 153
158 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz 154 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
159 do 155 do
160 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh 156 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
161 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output 157 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output