Mercurial > repos > davidvanzessen > mutation_analysis
comparison seqlogo @ 2:2f4298673519 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 10 Sep 2014 10:33:29 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:856b5b718d21 | 2:2f4298673519 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 =head1 NAME | |
| 4 | |
| 5 seqlogo - runs the logo creation script | |
| 6 | |
| 7 =head1 SYNOPSIS | |
| 8 | |
| 9 seqlogo [OPTION]...-f [FILENAME] | |
| 10 | |
| 11 =head1 DESCRIPTION | |
| 12 | |
| 13 Creates a logo for the given input filename. | |
| 14 | |
| 15 Available options: | |
| 16 -B <bar bits> Number of bits in bar (real between 0, 1) | |
| 17 -T <tic bits> Number of bits between tic marks | |
| 18 -C <chars per line> Number of characters per line of logo | |
| 19 -d <box shrink factor> Shrink factor of characters if option c is toggled | |
| 20 -E <error bar fraction> Fraction of error bar to show (real between 0, 1 ) | |
| 21 -f <input file> Input filename | |
| 22 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT | |
| 23 -h <logo height> Height of output logo (real > 0) | |
| 24 -k <kind of data> 0 for amino acid, 1 for nucleic acid ; if not | |
| 25 defined, a 90% certainty method is used to | |
| 26 determine whether the input data is amino acid or | |
| 27 nucleic acid | |
| 28 -l <sequence lower bound> Lower bound of sequence (integer) | |
| 29 -m <sequence upper bound> Upper bound of sequence (integer) | |
| 30 -o <output file> Name of output file | |
| 31 -s <sequence start> Sequence start number, defaults to 1 (int) | |
| 32 -t <titletext> Text of title, enclosed in "" if more than one word | |
| 33 -w <logo width> Width of output logo | |
| 34 -x <x-axis label> Label for x-axis | |
| 35 -y <y-axis label> Label for y-axis | |
| 36 | |
| 37 Available toggles (no values associated) | |
| 38 -a Toggle antialiasing | |
| 39 -b Toggle bar ends | |
| 40 -c Toggle color | |
| 41 -e Toggle error bar | |
| 42 -M Toggle small sample correction | |
| 43 -O Toggle outlining of characters | |
| 44 -n Toggle numbering of x-axis | |
| 45 -S Toggle stretching of logos to entire length | |
| 46 -X Toggle boxing of characters | |
| 47 -Y Toggle y-axis | |
| 48 | |
| 49 =head1 EXAMPLE | |
| 50 | |
| 51 The following command takes as input "input.fasta" and returns the logo in the | |
| 52 form "logo.eps". Antialiasing, bar ends, color, small sample correction, | |
| 53 numbering of x-axis, and y-axis labelling are turned on: | |
| 54 | |
| 55 seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY | |
| 56 | |
| 57 =cut | |
| 58 | |
| 59 use vars qw($PATH); | |
| 60 | |
| 61 BEGIN { | |
| 62 | |
| 63 use FindBin qw($Bin); | |
| 64 use lib "$Bin"; | |
| 65 | |
| 66 $PATH = $Bin; | |
| 67 | |
| 68 ## $PATH = "/h/gary/Seqlogo/Code/"; | |
| 69 # $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code"; | |
| 70 # unshift(@INC, $PATH); | |
| 71 } | |
| 72 | |
| 73 use logo; | |
| 74 use template; | |
| 75 use Getopt::Std; | |
| 76 use FileHandle; | |
| 77 | |
| 78 | |
| 79 my $opts; | |
| 80 $opts = | |
| 81 $opt_a || # antialiasing | |
| 82 $opt_b || # bar ends (0 or 1) | |
| 83 $opt_c || # color | |
| 84 $opt_e || # show error bar (0 or 1) | |
| 85 $opt_n || # numbering (0 or 1) | |
| 86 $opt_M || # small sample correction (0 or 1) | |
| 87 $opt_O || # outline (0 or 1) | |
| 88 $opt_S || # stretch | |
| 89 $opt_X || # box (0 for no box, 1 for box) | |
| 90 $opt_Y || # y axis | |
| 91 | |
| 92 $opt_B || # bar bits (real) | |
| 93 $opt_T || # tics bits (real) | |
| 94 $opt_C || # chars per line | |
| 95 $opt_d || # box shrinking factor (<1) | |
| 96 $opt_E || # error bar fraction (real) | |
| 97 $opt_f || # input filename | |
| 98 $opt_F || # format (PNG, EPS, PDF, GIF) | |
| 99 $opt_h || # logo height (cm) | |
| 100 $opt_k || # 0 = amino acid, 1 = nucleic acid | |
| 101 $opt_l || # lower bound of sequence to put in logo | |
| 102 $opt_m || # max bound of sequence to put in logo | |
| 103 $opt_o || # output file | |
| 104 $opt_s || # start number for very beginning of sequence | |
| 105 $opt_t || # title text (string) | |
| 106 $opt_w || # logo width (cm) | |
| 107 $opt_x || # x axis label | |
| 108 $opt_y || # y axis label | |
| 109 $opts; | |
| 110 | |
| 111 ################################################################################ | |
| 112 ##### USAGE ##### | |
| 113 ################################################################################ | |
| 114 | |
| 115 sub usage { | |
| 116 my $usage = <<END | |
| 117 | |
| 118 usage: seqlogo -f <input filename> [OPTIONs with values] | |
| 119 Creates a logo for the given input filename. | |
| 120 | |
| 121 Available options: | |
| 122 -B <bar bits> Number of bits in bar (real # > 0) | |
| 123 -T <tic bits> Number of bits between tic marks | |
| 124 -C <chars per line> Number of characters per line of logo | |
| 125 -d <box shrink factor> Shrink factor of characters if option c is toggled | |
| 126 -E <error bar fraction> Fraction of error bar to show (real # > 0) | |
| 127 -f <input filename> Input filename | |
| 128 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT | |
| 129 -h <logo height> Height of output logo (real # > 0) | |
| 130 -k <kind of data> 0 for amino acid, 1 for nucleic acid | |
| 131 -l <sequence lower bound> Lower bound of sequence (integer) | |
| 132 -m <sequence upper bound> Upper bound of sequence (integer) | |
| 133 -o <output file> Name of output file | |
| 134 -s <sequence start> Sequence start number, defaults to 1 (int) | |
| 135 -t <titletext> Text of title, enclosed in "" if more than one word | |
| 136 -w <logo width> Width of output logo | |
| 137 -x <x-axis label> Label for x-axis | |
| 138 -y <y-axis label> Label for y-axis | |
| 139 | |
| 140 Available toggles (no values associated) bOenc | |
| 141 -a Toggle antialiasing | |
| 142 -b Toggle bar ends | |
| 143 -c Toggle color | |
| 144 -e Toggle error bar | |
| 145 -M Toggle small sample correction | |
| 146 -O Toggle outlining of characters | |
| 147 -p Toggle fineprint | |
| 148 -n Toggle numbering of x-axis | |
| 149 -S Toggle stretching of logos to entire length | |
| 150 -X Toggle boxing of characters | |
| 151 -Y Toggle y-axis | |
| 152 | |
| 153 END | |
| 154 ; | |
| 155 | |
| 156 return $usage; | |
| 157 } | |
| 158 | |
| 159 ################################################################################ | |
| 160 ##### MAIN FUNCTION ##### | |
| 161 ################################################################################ | |
| 162 | |
| 163 # arguments : $_[0] : file name | |
| 164 MAIN: { | |
| 165 init(); | |
| 166 | |
| 167 # feed data from file to make height data array reference | |
| 168 my @input = <INPUTFILE>; | |
| 169 close (INPUTFILE); | |
| 170 my %heightparams = ( | |
| 171 smallsampletoggle => $opt_M, | |
| 172 input_kind => $opt_k, | |
| 173 stretch => $opt_S | |
| 174 ); | |
| 175 | |
| 176 my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) = | |
| 177 logo::getHeightData(\@input, \%heightparams); | |
| 178 | |
| 179 # check for errors | |
| 180 if ((defined $validformat) && ($validformat == 1)) { | |
| 181 die("Error: Invalid input format does not conform to FASTA, " . | |
| 182 "CLUSTAL, or Flat.\n"); | |
| 183 } | |
| 184 if (!$goodlength) { | |
| 185 die("Error: Number of characters in each logo line is not " . | |
| 186 "consistent, starting at: ", $badline, "\n"); | |
| 187 } | |
| 188 | |
| 189 my %input = ( | |
| 190 LOGO_HEIGHT => $opt_h, | |
| 191 LOGO_WIDTH => $opt_w, | |
| 192 COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW", | |
| 193 | |
| 194 LOGOSTART => $opt_l, | |
| 195 LOGOEND => $opt_m, | |
| 196 START_NUM => $opt_s, | |
| 197 | |
| 198 TITLETEXT => $opt_t, | |
| 199 YAXIS_LABEL => $opt_y, | |
| 200 XAXIS_LABEL => $opt_x, | |
| 201 | |
| 202 BOXSHRINK => $opt_d, | |
| 203 CHARSPERLINE => $opt_C, | |
| 204 BARBITS => $opt_B, | |
| 205 TICBITS => $opt_T, | |
| 206 RES => "96", | |
| 207 "FORMAT" => (uc $opt_F), | |
| 208 | |
| 209 # toggles | |
| 210 ANTIALIAS => $opt_a, | |
| 211 ERRBAR => $opt_e, | |
| 212 FINEPRINT => $opt_p, | |
| 213 NUMBERING => $opt_n, | |
| 214 OUTLINE => $opt_O, | |
| 215 SHOWENDS => $opt_b, | |
| 216 SHOWINGBOX => $opt_X, | |
| 217 YAXIS => $opt_Y | |
| 218 ); | |
| 219 | |
| 220 template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH); | |
| 221 } | |
| 222 | |
| 223 | |
| 224 ################################################################################ | |
| 225 ##### FUNCTINOS FOR INIT ##### | |
| 226 ################################################################################ | |
| 227 | |
| 228 # all ints | |
| 229 sub isInt { | |
| 230 return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0; | |
| 231 } | |
| 232 | |
| 233 # all reals | |
| 234 sub isReal { | |
| 235 return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0; | |
| 236 } | |
| 237 | |
| 238 sub isZeroOrOne { | |
| 239 return ($_[0] == 0 || $_[0] == 1) ? 1 : 0; | |
| 240 } | |
| 241 | |
| 242 sub init { | |
| 243 | |
| 244 # if (not defined $PATH) { | |
| 245 # die ("PATH must be defined\n"); | |
| 246 # } elsif (not -e $PATH) { | |
| 247 # die ("PATH ($PATH) must exist\n"); | |
| 248 # } elsif (not -d $PATH) { | |
| 249 # die ("PATH ($PATH) must be a directory\n"); | |
| 250 # } | |
| 251 | |
| 252 &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY'); | |
| 253 | |
| 254 if (defined $opt_B && | |
| 255 (!isReal($opt_B) || $opt_B < 0) ) { | |
| 256 printf("\noption B must be a positive real, but is $opt_B, $!\n"); | |
| 257 die &usage(); | |
| 258 } | |
| 259 if (defined $opt_d && | |
| 260 ( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) { | |
| 261 print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n"); | |
| 262 die &usage(); | |
| 263 } | |
| 264 if (defined $opt_E && | |
| 265 (!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) { | |
| 266 print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n"); | |
| 267 die &usage(); | |
| 268 } | |
| 269 if (defined $opt_f) { | |
| 270 open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n"; | |
| 271 } else { | |
| 272 print("\ninput file not specified, terminating...\n"); | |
| 273 die &usage(); | |
| 274 } | |
| 275 if (defined $opt_h && | |
| 276 (!isReal($opt_h) || $opt_h < 0) ) { | |
| 277 print("\noption h must be a positive real, but is $opt_h, $!\n"); | |
| 278 die &usage(); | |
| 279 } | |
| 280 if (defined $opt_w && | |
| 281 (!isReal($opt_w) || $opt_w < 0) ) { | |
| 282 print("\noption w must be a positive real, but is $opt_w, $!\n"); | |
| 283 die &usage(); | |
| 284 } | |
| 285 if (defined $opt_k && | |
| 286 (!isZeroOrOne($opt_k)) ) { | |
| 287 print("\noption k must be 0 or 1, but is $opt_k, $!\n"); | |
| 288 die &usage(); | |
| 289 } | |
| 290 | |
| 291 #toggles | |
| 292 if (!defined $opt_a) { | |
| 293 $opt_a = 0; | |
| 294 } | |
| 295 if (!defined $opt_b) { | |
| 296 $opt_b = 0; | |
| 297 } | |
| 298 if (!defined $opt_c) { | |
| 299 $opt_c = 0; | |
| 300 } | |
| 301 if (!defined $opt_e) { | |
| 302 $opt_e = 0; | |
| 303 } | |
| 304 if (!defined $opt_n) { | |
| 305 $opt_n = 0; | |
| 306 } | |
| 307 if (!defined $opt_M) { | |
| 308 $opt_M = 0; | |
| 309 } | |
| 310 if (!defined $opt_O) { | |
| 311 $opt_O = 0; | |
| 312 } | |
| 313 if (!defined $opt_p) { | |
| 314 $opt_p = 0; | |
| 315 } | |
| 316 if (!defined $opt_S) { | |
| 317 $opt_S = 0; | |
| 318 } | |
| 319 if (!defined $opt_X) { | |
| 320 $opt_X = 0; | |
| 321 }; | |
| 322 if (!defined $opt_Y) { | |
| 323 $opt_Y = 0; | |
| 324 }; | |
| 325 | |
| 326 if (!defined $opt_F) { | |
| 327 $opt_F = "EPS"; # default to EPS | |
| 328 } | |
| 329 if (!defined $opt_o) { | |
| 330 $opt_o = "-"; # for standard out | |
| 331 } else { | |
| 332 # $opt_o =~ s/\.\S*$//; # remove extension if there is one | |
| 333 $opt_o .= "." . (lc $opt_F); # make file name | |
| 334 } | |
| 335 | |
| 336 if (defined $opt_C && | |
| 337 (!isInt($opt_C) || $opt_C < 0) ) { | |
| 338 printf("\noption C must be a postive integer, but is $opt_C, $!\n"); | |
| 339 die &usage(); | |
| 340 } | |
| 341 | |
| 342 if (defined $opt_l && !isInt($opt_l)) { | |
| 343 printf("\noption l must be an integer, but is $opt_l, $!\n"); | |
| 344 die &usage(); | |
| 345 } | |
| 346 | |
| 347 if (defined $opt_m && !isInt($opt_m)) { | |
| 348 printf("\noption m must be an integer, but is $opt_m, $!\n"); | |
| 349 die &usage(); | |
| 350 } | |
| 351 | |
| 352 if (defined $opt_s && !isInt($opt_s)) { | |
| 353 printf("\noption s must be an integer, but is $opt_s, $!\n"); | |
| 354 die &usage(); | |
| 355 } | |
| 356 } |
