Mercurial > repos > rnateam > rnacode
comparison processMAF.sh @ 1:0554fa091710 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rnacode commit b6d3800e260cdfcbe99e5f056344c3df101dad00
| author | rnateam |
|---|---|
| date | Fri, 13 Apr 2018 07:37:15 -0400 |
| parents | |
| children | f9a7dc68d6cf |
comparison
equal
deleted
inserted
replaced
| 0:95340f016498 | 1:0554fa091710 |
|---|---|
| 1 #!/usr/bin/env bash | |
| 2 # RNAcode sometimes fails because of bugs. Since the manual suggests | |
| 3 # to call RNAcode on splitted alignments it is feasible to run | |
| 4 # RNAcode separately on the parts. This is implemented here. Command | |
| 5 # line parameters just passed on to RNAcode. | |
| 6 # | |
| 7 # - the script ensures that the region ids are continuous (otherwise | |
| 8 # the results for each block would start with 0) | |
| 9 # - also eps file names are corrected accordingly | |
| 10 # if RNAcode fails for one part it just outputs the part (for bug reporting) | |
| 11 # and continues | |
| 12 | |
| 13 # for splitting the alignment you can use breakMAF.pl from the RNAcode | |
| 14 # github (it seems to be absent from the 0.3 release) and feed the output | |
| 15 # with the desired RNAcode parameters into this shell script, e.g.: | |
| 16 # | |
| 17 # breakMAF.pl < chrM.maf | ./processMAF.sh --tabular --eps --eps-dir eps2/ | |
| 18 | |
| 19 # parse the command line options | |
| 20 # - removes --outfile, --help, --version, and the input file from the arguments | |
| 21 # - outfile and infile are stored in variables | |
| 22 declare -a args=() | |
| 23 while [[ $# -gt 0 ]] | |
| 24 do | |
| 25 key="$1" | |
| 26 case $key in | |
| 27 -d|--eps-dir) | |
| 28 epsdir=$2 | |
| 29 args+=($1 $2) | |
| 30 shift # past argument | |
| 31 shift # past value | |
| 32 ;; | |
| 33 -e|--eps) | |
| 34 eps=1 | |
| 35 args+=($1) | |
| 36 shift # past argument | |
| 37 ;; | |
| 38 -g|--gtf) | |
| 39 gtf=1 | |
| 40 args+=($1) | |
| 41 shift # past argument | |
| 42 ;; | |
| 43 -t|--tabular) | |
| 44 tabular=1 | |
| 45 args+=($1) | |
| 46 shift # past argument | |
| 47 ;; | |
| 48 -o|--outfile) | |
| 49 outfile=$2 | |
| 50 shift # past argument | |
| 51 shift # past value | |
| 52 ;; | |
| 53 -b|--best-only|-r|--best-region|-s|--stop-early) | |
| 54 args+=($1) | |
| 55 shift # past argument | |
| 56 ;; | |
| 57 -n|--num-samples|-p|--cutoff|-c|--pars|-i|--eps-cutoff) | |
| 58 args+=($1 $2) | |
| 59 shift # past argument | |
| 60 shift # past value | |
| 61 ;; | |
| 62 -h|--help|-v|--version) | |
| 63 shift # past argument | |
| 64 ;; | |
| 65 *) # unknown option | |
| 66 file=$1 | |
| 67 shift # past argument | |
| 68 ;; | |
| 69 esac | |
| 70 done | |
| 71 | |
| 72 # fix output (renumber blocks) | |
| 73 # and move eps files (if present) to tmpdir | |
| 74 function fix_output { | |
| 75 if [[ -z "$last" ]]; then | |
| 76 last=0 | |
| 77 fi | |
| 78 while read line | |
| 79 do | |
| 80 if [[ -z "$gtf" ]]; then | |
| 81 i=`echo "$line" | sed 's/^\([[:digit:]]\+\)[[:space:]].*/\1/'` | |
| 82 else | |
| 83 i=`echo $line | sed 's/.*Gene\([[:digit:]]\+\).*/\1/'` | |
| 84 fi | |
| 85 j=`echo "$i+$last" | bc` | |
| 86 if [[ -z "$gtf" ]]; then | |
| 87 echo "$line" | sed "s/^\([[:digit:]]\+\)\([[:space:]].*\)/$j\2/" | |
| 88 else | |
| 89 echo "$line" | sed "s/^\(.*\)Gene[0-9]\+\(\".*\)$/\1Gene$j\2/" | |
| 90 fi | |
| 91 #echo $line | awk -v n=$j '{printf("%d\t", n); for(i=2; i<=NF; i++){printf("%s", $(i)); if(i==NF){printf("\n")}else{printf("\t")}}}' | |
| 92 if [[ ! -z "$eps" && -f ${epsdir:-eps}/hss-$i.eps ]]; then | |
| 93 mv ${epsdir:-eps}/hss-$i.eps $tmpd/hss-$j.eps | |
| 94 fi | |
| 95 done | |
| 96 if [[ ! -z "$j" ]]; then | |
| 97 last=`echo "$j+1" | bc` | |
| 98 unset j | |
| 99 fi | |
| 100 } | |
| 101 | |
| 102 # run RNAcode for $tempfile if >= 3 sequences | |
| 103 function run_rnacode { | |
| 104 >&2 echo -e "processing " `cat ${tmpif} | grep ^s | head -n 1 | cut -d" " -f1-6` | |
| 105 # >&2 echo "with RNAcode" $@ | |
| 106 nl=`cat ${tmpif} | grep "^s" | wc -l` | |
| 107 if [[ "$nl" -ge "3" ]]; then | |
| 108 # - filter the outfile for lines containing the ref and redirect everything to stderr | |
| 109 # https://github.com/wash/rnacode/issues/9 | |
| 110 # - we can not pipe stdout | ... | fix_output since then $last can not be used as global variable | |
| 111 if [[ ! -z "$gtf" ]]; then | |
| 112 field=1 | |
| 113 elif [[ ! -z "$tabular" ]]; then | |
| 114 field=7 | |
| 115 else | |
| 116 field=6 | |
| 117 fi | |
| 118 RNAcode $@ | awk -v ref=$ref -v field=$field '{if($(field)==ref){print $0}else{$0 > "/dev/stderr"}}' > ${tmpof} | |
| 119 | |
| 120 if [[ "$?" != "0" ]]; then | |
| 121 ef=$(mktemp -u -p '.') | |
| 122 cat ${tmpif} > ${ef}.maf | |
| 123 >&2 echo "RNAcode failed for the alignmentblock \""`cat ${tmpif} | grep $ref | cut -d" " -f 1-6`"\" (${ef}.maf)" | |
| 124 fi | |
| 125 fix_output < $tmpof | |
| 126 echo -n > ${tmpof} | |
| 127 else | |
| 128 >&2 echo "less than 3 sequences in the alignment block \""`cat ${tmpif} | grep $ref | cut -d" " -f 1-6`"\"" | |
| 129 fi | |
| 130 } | |
| 131 | |
| 132 ref="" | |
| 133 last=0 | |
| 134 | |
| 135 if [[ ! -z "$tabular" ]]; then | |
| 136 echo -e "HSS #\tFrame\tLength\tFrom\tTo\tName\tStart\tEnd\tScore\tP" >> ${outfile:-/dev/stdout} | |
| 137 fi | |
| 138 | |
| 139 tmpif=$(mktemp -p '.') | |
| 140 tmpof=$(mktemp -p '.') | |
| 141 tmpd=$(mktemp -d -p '.') | |
| 142 | |
| 143 # process lines of the alignment | |
| 144 # - save lines to tmpif | |
| 145 # - empty lines: process tmpif (ie. last alignment block) with RNAcode, clear tmpif | |
| 146 # - on the go the name of the reference species is determined from the 1st line | |
| 147 # of the alignment this is used then for filtering the RNAcode output | |
| 148 # in case of gtf output only the chromosome is printed, ie only chr1 instead of dm6.chr1 | |
| 149 while read line | |
| 150 do | |
| 151 if [[ "$line" =~ ^# ]]; then | |
| 152 echo -n > ${tmpif} | |
| 153 elif [[ "$line" =~ ^$ ]]; then | |
| 154 run_rnacode ${args[@]} ${tmpif} | |
| 155 # >> ${outfile:-/dev/stdout} | |
| 156 echo -n > ${tmpif} | |
| 157 else | |
| 158 if [[ -z $ref && "$line" =~ ^s ]]; then | |
| 159 if [[ -z "$gtf" ]]; then | |
| 160 ref=`echo $line | cut -d" " -f 2` | |
| 161 else | |
| 162 ref=`echo $line | sed 's/\./ /g' | cut -d" " -f 3` | |
| 163 fi | |
| 164 fi | |
| 165 echo $line >> ${tmpif} | |
| 166 fi | |
| 167 done < ${file:-/dev/stdin} | |
| 168 # if there is something left -> process it | |
| 169 if [[ "`cat ${tmpif} | wc -l`" -gt "0" ]]; then | |
| 170 run_rnacode ${args[@]} ${tmpif} | |
| 171 # >> ${outfile:-/dev/stdout} | |
| 172 fi | |
| 173 | |
| 174 if [[ ! -z "$eps" ]]; then | |
| 175 mv ${tmpd}/*eps ${epsdir:-eps}/ | |
| 176 fi | |
| 177 | |
| 178 rm ${tmpif} | |
| 179 rm ${tmpof} | |
| 180 rmdir ${tmpd} |
