Mercurial > repos > davidvanzessen > mutation_analysis
comparison seqlogo @ 2:2f4298673519 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 10 Sep 2014 10:33:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:856b5b718d21 | 2:2f4298673519 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 =head1 NAME | |
4 | |
5 seqlogo - runs the logo creation script | |
6 | |
7 =head1 SYNOPSIS | |
8 | |
9 seqlogo [OPTION]...-f [FILENAME] | |
10 | |
11 =head1 DESCRIPTION | |
12 | |
13 Creates a logo for the given input filename. | |
14 | |
15 Available options: | |
16 -B <bar bits> Number of bits in bar (real between 0, 1) | |
17 -T <tic bits> Number of bits between tic marks | |
18 -C <chars per line> Number of characters per line of logo | |
19 -d <box shrink factor> Shrink factor of characters if option c is toggled | |
20 -E <error bar fraction> Fraction of error bar to show (real between 0, 1 ) | |
21 -f <input file> Input filename | |
22 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT | |
23 -h <logo height> Height of output logo (real > 0) | |
24 -k <kind of data> 0 for amino acid, 1 for nucleic acid ; if not | |
25 defined, a 90% certainty method is used to | |
26 determine whether the input data is amino acid or | |
27 nucleic acid | |
28 -l <sequence lower bound> Lower bound of sequence (integer) | |
29 -m <sequence upper bound> Upper bound of sequence (integer) | |
30 -o <output file> Name of output file | |
31 -s <sequence start> Sequence start number, defaults to 1 (int) | |
32 -t <titletext> Text of title, enclosed in "" if more than one word | |
33 -w <logo width> Width of output logo | |
34 -x <x-axis label> Label for x-axis | |
35 -y <y-axis label> Label for y-axis | |
36 | |
37 Available toggles (no values associated) | |
38 -a Toggle antialiasing | |
39 -b Toggle bar ends | |
40 -c Toggle color | |
41 -e Toggle error bar | |
42 -M Toggle small sample correction | |
43 -O Toggle outlining of characters | |
44 -n Toggle numbering of x-axis | |
45 -S Toggle stretching of logos to entire length | |
46 -X Toggle boxing of characters | |
47 -Y Toggle y-axis | |
48 | |
49 =head1 EXAMPLE | |
50 | |
51 The following command takes as input "input.fasta" and returns the logo in the | |
52 form "logo.eps". Antialiasing, bar ends, color, small sample correction, | |
53 numbering of x-axis, and y-axis labelling are turned on: | |
54 | |
55 seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY | |
56 | |
57 =cut | |
58 | |
59 use vars qw($PATH); | |
60 | |
61 BEGIN { | |
62 | |
63 use FindBin qw($Bin); | |
64 use lib "$Bin"; | |
65 | |
66 $PATH = $Bin; | |
67 | |
68 ## $PATH = "/h/gary/Seqlogo/Code/"; | |
69 # $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code"; | |
70 # unshift(@INC, $PATH); | |
71 } | |
72 | |
73 use logo; | |
74 use template; | |
75 use Getopt::Std; | |
76 use FileHandle; | |
77 | |
78 | |
79 my $opts; | |
80 $opts = | |
81 $opt_a || # antialiasing | |
82 $opt_b || # bar ends (0 or 1) | |
83 $opt_c || # color | |
84 $opt_e || # show error bar (0 or 1) | |
85 $opt_n || # numbering (0 or 1) | |
86 $opt_M || # small sample correction (0 or 1) | |
87 $opt_O || # outline (0 or 1) | |
88 $opt_S || # stretch | |
89 $opt_X || # box (0 for no box, 1 for box) | |
90 $opt_Y || # y axis | |
91 | |
92 $opt_B || # bar bits (real) | |
93 $opt_T || # tics bits (real) | |
94 $opt_C || # chars per line | |
95 $opt_d || # box shrinking factor (<1) | |
96 $opt_E || # error bar fraction (real) | |
97 $opt_f || # input filename | |
98 $opt_F || # format (PNG, EPS, PDF, GIF) | |
99 $opt_h || # logo height (cm) | |
100 $opt_k || # 0 = amino acid, 1 = nucleic acid | |
101 $opt_l || # lower bound of sequence to put in logo | |
102 $opt_m || # max bound of sequence to put in logo | |
103 $opt_o || # output file | |
104 $opt_s || # start number for very beginning of sequence | |
105 $opt_t || # title text (string) | |
106 $opt_w || # logo width (cm) | |
107 $opt_x || # x axis label | |
108 $opt_y || # y axis label | |
109 $opts; | |
110 | |
111 ################################################################################ | |
112 ##### USAGE ##### | |
113 ################################################################################ | |
114 | |
115 sub usage { | |
116 my $usage = <<END | |
117 | |
118 usage: seqlogo -f <input filename> [OPTIONs with values] | |
119 Creates a logo for the given input filename. | |
120 | |
121 Available options: | |
122 -B <bar bits> Number of bits in bar (real # > 0) | |
123 -T <tic bits> Number of bits between tic marks | |
124 -C <chars per line> Number of characters per line of logo | |
125 -d <box shrink factor> Shrink factor of characters if option c is toggled | |
126 -E <error bar fraction> Fraction of error bar to show (real # > 0) | |
127 -f <input filename> Input filename | |
128 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT | |
129 -h <logo height> Height of output logo (real # > 0) | |
130 -k <kind of data> 0 for amino acid, 1 for nucleic acid | |
131 -l <sequence lower bound> Lower bound of sequence (integer) | |
132 -m <sequence upper bound> Upper bound of sequence (integer) | |
133 -o <output file> Name of output file | |
134 -s <sequence start> Sequence start number, defaults to 1 (int) | |
135 -t <titletext> Text of title, enclosed in "" if more than one word | |
136 -w <logo width> Width of output logo | |
137 -x <x-axis label> Label for x-axis | |
138 -y <y-axis label> Label for y-axis | |
139 | |
140 Available toggles (no values associated) bOenc | |
141 -a Toggle antialiasing | |
142 -b Toggle bar ends | |
143 -c Toggle color | |
144 -e Toggle error bar | |
145 -M Toggle small sample correction | |
146 -O Toggle outlining of characters | |
147 -p Toggle fineprint | |
148 -n Toggle numbering of x-axis | |
149 -S Toggle stretching of logos to entire length | |
150 -X Toggle boxing of characters | |
151 -Y Toggle y-axis | |
152 | |
153 END | |
154 ; | |
155 | |
156 return $usage; | |
157 } | |
158 | |
159 ################################################################################ | |
160 ##### MAIN FUNCTION ##### | |
161 ################################################################################ | |
162 | |
163 # arguments : $_[0] : file name | |
164 MAIN: { | |
165 init(); | |
166 | |
167 # feed data from file to make height data array reference | |
168 my @input = <INPUTFILE>; | |
169 close (INPUTFILE); | |
170 my %heightparams = ( | |
171 smallsampletoggle => $opt_M, | |
172 input_kind => $opt_k, | |
173 stretch => $opt_S | |
174 ); | |
175 | |
176 my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) = | |
177 logo::getHeightData(\@input, \%heightparams); | |
178 | |
179 # check for errors | |
180 if ((defined $validformat) && ($validformat == 1)) { | |
181 die("Error: Invalid input format does not conform to FASTA, " . | |
182 "CLUSTAL, or Flat.\n"); | |
183 } | |
184 if (!$goodlength) { | |
185 die("Error: Number of characters in each logo line is not " . | |
186 "consistent, starting at: ", $badline, "\n"); | |
187 } | |
188 | |
189 my %input = ( | |
190 LOGO_HEIGHT => $opt_h, | |
191 LOGO_WIDTH => $opt_w, | |
192 COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW", | |
193 | |
194 LOGOSTART => $opt_l, | |
195 LOGOEND => $opt_m, | |
196 START_NUM => $opt_s, | |
197 | |
198 TITLETEXT => $opt_t, | |
199 YAXIS_LABEL => $opt_y, | |
200 XAXIS_LABEL => $opt_x, | |
201 | |
202 BOXSHRINK => $opt_d, | |
203 CHARSPERLINE => $opt_C, | |
204 BARBITS => $opt_B, | |
205 TICBITS => $opt_T, | |
206 RES => "96", | |
207 "FORMAT" => (uc $opt_F), | |
208 | |
209 # toggles | |
210 ANTIALIAS => $opt_a, | |
211 ERRBAR => $opt_e, | |
212 FINEPRINT => $opt_p, | |
213 NUMBERING => $opt_n, | |
214 OUTLINE => $opt_O, | |
215 SHOWENDS => $opt_b, | |
216 SHOWINGBOX => $opt_X, | |
217 YAXIS => $opt_Y | |
218 ); | |
219 | |
220 template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH); | |
221 } | |
222 | |
223 | |
224 ################################################################################ | |
225 ##### FUNCTINOS FOR INIT ##### | |
226 ################################################################################ | |
227 | |
228 # all ints | |
229 sub isInt { | |
230 return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0; | |
231 } | |
232 | |
233 # all reals | |
234 sub isReal { | |
235 return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0; | |
236 } | |
237 | |
238 sub isZeroOrOne { | |
239 return ($_[0] == 0 || $_[0] == 1) ? 1 : 0; | |
240 } | |
241 | |
242 sub init { | |
243 | |
244 # if (not defined $PATH) { | |
245 # die ("PATH must be defined\n"); | |
246 # } elsif (not -e $PATH) { | |
247 # die ("PATH ($PATH) must exist\n"); | |
248 # } elsif (not -d $PATH) { | |
249 # die ("PATH ($PATH) must be a directory\n"); | |
250 # } | |
251 | |
252 &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY'); | |
253 | |
254 if (defined $opt_B && | |
255 (!isReal($opt_B) || $opt_B < 0) ) { | |
256 printf("\noption B must be a positive real, but is $opt_B, $!\n"); | |
257 die &usage(); | |
258 } | |
259 if (defined $opt_d && | |
260 ( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) { | |
261 print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n"); | |
262 die &usage(); | |
263 } | |
264 if (defined $opt_E && | |
265 (!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) { | |
266 print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n"); | |
267 die &usage(); | |
268 } | |
269 if (defined $opt_f) { | |
270 open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n"; | |
271 } else { | |
272 print("\ninput file not specified, terminating...\n"); | |
273 die &usage(); | |
274 } | |
275 if (defined $opt_h && | |
276 (!isReal($opt_h) || $opt_h < 0) ) { | |
277 print("\noption h must be a positive real, but is $opt_h, $!\n"); | |
278 die &usage(); | |
279 } | |
280 if (defined $opt_w && | |
281 (!isReal($opt_w) || $opt_w < 0) ) { | |
282 print("\noption w must be a positive real, but is $opt_w, $!\n"); | |
283 die &usage(); | |
284 } | |
285 if (defined $opt_k && | |
286 (!isZeroOrOne($opt_k)) ) { | |
287 print("\noption k must be 0 or 1, but is $opt_k, $!\n"); | |
288 die &usage(); | |
289 } | |
290 | |
291 #toggles | |
292 if (!defined $opt_a) { | |
293 $opt_a = 0; | |
294 } | |
295 if (!defined $opt_b) { | |
296 $opt_b = 0; | |
297 } | |
298 if (!defined $opt_c) { | |
299 $opt_c = 0; | |
300 } | |
301 if (!defined $opt_e) { | |
302 $opt_e = 0; | |
303 } | |
304 if (!defined $opt_n) { | |
305 $opt_n = 0; | |
306 } | |
307 if (!defined $opt_M) { | |
308 $opt_M = 0; | |
309 } | |
310 if (!defined $opt_O) { | |
311 $opt_O = 0; | |
312 } | |
313 if (!defined $opt_p) { | |
314 $opt_p = 0; | |
315 } | |
316 if (!defined $opt_S) { | |
317 $opt_S = 0; | |
318 } | |
319 if (!defined $opt_X) { | |
320 $opt_X = 0; | |
321 }; | |
322 if (!defined $opt_Y) { | |
323 $opt_Y = 0; | |
324 }; | |
325 | |
326 if (!defined $opt_F) { | |
327 $opt_F = "EPS"; # default to EPS | |
328 } | |
329 if (!defined $opt_o) { | |
330 $opt_o = "-"; # for standard out | |
331 } else { | |
332 # $opt_o =~ s/\.\S*$//; # remove extension if there is one | |
333 $opt_o .= "." . (lc $opt_F); # make file name | |
334 } | |
335 | |
336 if (defined $opt_C && | |
337 (!isInt($opt_C) || $opt_C < 0) ) { | |
338 printf("\noption C must be a postive integer, but is $opt_C, $!\n"); | |
339 die &usage(); | |
340 } | |
341 | |
342 if (defined $opt_l && !isInt($opt_l)) { | |
343 printf("\noption l must be an integer, but is $opt_l, $!\n"); | |
344 die &usage(); | |
345 } | |
346 | |
347 if (defined $opt_m && !isInt($opt_m)) { | |
348 printf("\noption m must be an integer, but is $opt_m, $!\n"); | |
349 die &usage(); | |
350 } | |
351 | |
352 if (defined $opt_s && !isInt($opt_s)) { | |
353 printf("\noption s must be an integer, but is $opt_s, $!\n"); | |
354 die &usage(); | |
355 } | |
356 } |