Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 89:480fdd383fdb draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 31 May 2016 08:30:50 -0400 |
parents | 07f7da724a77 |
children | f0e8dac22c6e |
comparison
equal
deleted
inserted
replaced
88:d57c624a9aa9 | 89:480fdd383fdb |
---|---|
19 echo "---------------- read parameters ----------------" | 19 echo "---------------- read parameters ----------------" |
20 echo "---------------- read parameters ----------------<br />" > $output | 20 echo "---------------- read parameters ----------------<br />" > $output |
21 | 21 |
22 echo "unpacking IMGT file" | 22 echo "unpacking IMGT file" |
23 | 23 |
24 | |
25 | |
24 type="`file $input`" | 26 type="`file $input`" |
25 if [[ "$type" == *"Zip archive"* ]] ; then | 27 if [[ "$type" == *"Zip archive"* ]] ; then |
26 echo "Zip archive" | 28 echo "Zip archive" |
27 echo "unzip $input -d $PWD/files/" | 29 echo "unzip $input -d $PWD/files/" |
28 unzip $input -d $PWD/files/ | 30 unzip $input -d $PWD/files/ |
39 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt | 41 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt |
40 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt | 42 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt |
41 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt | 43 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt |
42 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt | 44 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt |
43 | 45 |
44 | |
45 | |
46 #cat $PWD/files/*/1_* > $PWD/summary.txt | 46 #cat $PWD/files/*/1_* > $PWD/summary.txt |
47 #cat $PWD/files/*/3_* > $PWD/sequences.txt | 47 #cat $PWD/files/*/3_* > $PWD/sequences.txt |
48 #cat $PWD/files/*/5_* > $PWD/aa.txt | 48 #cat $PWD/files/*/5_* > $PWD/aa.txt |
49 #cat $PWD/files/*/6_* > $PWD/junction.txt | 49 #cat $PWD/files/*/6_* > $PWD/junction.txt |
50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt | 50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt |
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" |
55 | 55 |
56 echo "${BLASTN_DIR}" | 56 echo "${BLASTN_DIR}" |
57 | 57 |
58 echo "identification ($method)" | 58 echo "identification ($method)" |
59 echo "identification ($method)<br />" >> $output | 59 echo "---------------- identification ($method) ----------------" |
60 | 60 echo "---------------- identification ($method) ----------------<br />" >> $output |
61 echo "blast or custom" | |
62 | 61 |
63 if [[ "${method}" == "custom" ]] ; then | 62 if [[ "${method}" == "custom" ]] ; then |
64 echo "custom" | |
65 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt |
66 else | 64 else |
67 echo "blast" | |
68 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) |
69 ID_index=$((ID_index+1)) | 66 ID_index=$((ID_index+1)) |
70 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) |
71 sequence_index=$((sequence_index+1)) | 68 sequence_index=$((sequence_index+1)) |
72 | 69 |
73 echo "$ID_index ${sequence_index}" | |
74 | |
75 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta | 70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta |
76 | 71 |
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt |
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt |
79 fi | 74 fi |
108 echo "---------------- sequence_overview.r ----------------" | 103 echo "---------------- sequence_overview.r ----------------" |
109 | 104 |
110 mkdir $outdir/sequence_overview | 105 mkdir $outdir/sequence_overview |
111 | 106 |
112 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 107 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 |
108 #Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | |
113 | 109 |
114 echo "<table border='1'>" > $outdir/base_overview.html | 110 echo "<table border='1'>" > $outdir/base_overview.html |
115 | 111 |
116 while read ID class seq A C G T | 112 while read ID class seq A C G T |
117 do | 113 do |
151 done | 147 done |
152 | 148 |
153 tmp=`cat $outdir/unmatched_${func}_n.txt` | 149 tmp=`cat $outdir/unmatched_${func}_n.txt` |
154 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output | 150 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output |
155 tmp=`cat $outdir/all_${func}_n.txt` | 151 tmp=`cat $outdir/all_${func}_n.txt` |
156 echo "<th><a href='matched_${func}_all.txt'>all (N = $tmp)</a></th>" >> $output | 152 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output |
157 | 153 |
158 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz | 154 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz |
159 do | 155 do |
160 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh | 156 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh |
161 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output | 157 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output |