2
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 seqlogo - runs the logo creation script
|
|
6
|
|
7 =head1 SYNOPSIS
|
|
8
|
|
9 seqlogo [OPTION]...-f [FILENAME]
|
|
10
|
|
11 =head1 DESCRIPTION
|
|
12
|
|
13 Creates a logo for the given input filename.
|
|
14
|
|
15 Available options:
|
|
16 -B <bar bits> Number of bits in bar (real between 0, 1)
|
|
17 -T <tic bits> Number of bits between tic marks
|
|
18 -C <chars per line> Number of characters per line of logo
|
|
19 -d <box shrink factor> Shrink factor of characters if option c is toggled
|
|
20 -E <error bar fraction> Fraction of error bar to show (real between 0, 1 )
|
|
21 -f <input file> Input filename
|
|
22 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT
|
|
23 -h <logo height> Height of output logo (real > 0)
|
|
24 -k <kind of data> 0 for amino acid, 1 for nucleic acid ; if not
|
|
25 defined, a 90% certainty method is used to
|
|
26 determine whether the input data is amino acid or
|
|
27 nucleic acid
|
|
28 -l <sequence lower bound> Lower bound of sequence (integer)
|
|
29 -m <sequence upper bound> Upper bound of sequence (integer)
|
|
30 -o <output file> Name of output file
|
|
31 -s <sequence start> Sequence start number, defaults to 1 (int)
|
|
32 -t <titletext> Text of title, enclosed in "" if more than one word
|
|
33 -w <logo width> Width of output logo
|
|
34 -x <x-axis label> Label for x-axis
|
|
35 -y <y-axis label> Label for y-axis
|
|
36
|
|
37 Available toggles (no values associated)
|
|
38 -a Toggle antialiasing
|
|
39 -b Toggle bar ends
|
|
40 -c Toggle color
|
|
41 -e Toggle error bar
|
|
42 -M Toggle small sample correction
|
|
43 -O Toggle outlining of characters
|
|
44 -n Toggle numbering of x-axis
|
|
45 -S Toggle stretching of logos to entire length
|
|
46 -X Toggle boxing of characters
|
|
47 -Y Toggle y-axis
|
|
48
|
|
49 =head1 EXAMPLE
|
|
50
|
|
51 The following command takes as input "input.fasta" and returns the logo in the
|
|
52 form "logo.eps". Antialiasing, bar ends, color, small sample correction,
|
|
53 numbering of x-axis, and y-axis labelling are turned on:
|
|
54
|
|
55 seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY
|
|
56
|
|
57 =cut
|
|
58
|
|
59 use vars qw($PATH);
|
|
60
|
|
61 BEGIN {
|
|
62
|
|
63 use FindBin qw($Bin);
|
|
64 use lib "$Bin";
|
|
65
|
|
66 $PATH = $Bin;
|
|
67
|
|
68 ## $PATH = "/h/gary/Seqlogo/Code/";
|
|
69 # $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code";
|
|
70 # unshift(@INC, $PATH);
|
|
71 }
|
|
72
|
|
73 use logo;
|
|
74 use template;
|
|
75 use Getopt::Std;
|
|
76 use FileHandle;
|
|
77
|
|
78
|
|
79 my $opts;
|
|
80 $opts =
|
|
81 $opt_a || # antialiasing
|
|
82 $opt_b || # bar ends (0 or 1)
|
|
83 $opt_c || # color
|
|
84 $opt_e || # show error bar (0 or 1)
|
|
85 $opt_n || # numbering (0 or 1)
|
|
86 $opt_M || # small sample correction (0 or 1)
|
|
87 $opt_O || # outline (0 or 1)
|
|
88 $opt_S || # stretch
|
|
89 $opt_X || # box (0 for no box, 1 for box)
|
|
90 $opt_Y || # y axis
|
|
91
|
|
92 $opt_B || # bar bits (real)
|
|
93 $opt_T || # tics bits (real)
|
|
94 $opt_C || # chars per line
|
|
95 $opt_d || # box shrinking factor (<1)
|
|
96 $opt_E || # error bar fraction (real)
|
|
97 $opt_f || # input filename
|
|
98 $opt_F || # format (PNG, EPS, PDF, GIF)
|
|
99 $opt_h || # logo height (cm)
|
|
100 $opt_k || # 0 = amino acid, 1 = nucleic acid
|
|
101 $opt_l || # lower bound of sequence to put in logo
|
|
102 $opt_m || # max bound of sequence to put in logo
|
|
103 $opt_o || # output file
|
|
104 $opt_s || # start number for very beginning of sequence
|
|
105 $opt_t || # title text (string)
|
|
106 $opt_w || # logo width (cm)
|
|
107 $opt_x || # x axis label
|
|
108 $opt_y || # y axis label
|
|
109 $opts;
|
|
110
|
|
111 ################################################################################
|
|
112 ##### USAGE #####
|
|
113 ################################################################################
|
|
114
|
|
115 sub usage {
|
|
116 my $usage = <<END
|
|
117
|
|
118 usage: seqlogo -f <input filename> [OPTIONs with values]
|
|
119 Creates a logo for the given input filename.
|
|
120
|
|
121 Available options:
|
|
122 -B <bar bits> Number of bits in bar (real # > 0)
|
|
123 -T <tic bits> Number of bits between tic marks
|
|
124 -C <chars per line> Number of characters per line of logo
|
|
125 -d <box shrink factor> Shrink factor of characters if option c is toggled
|
|
126 -E <error bar fraction> Fraction of error bar to show (real # > 0)
|
|
127 -f <input filename> Input filename
|
|
128 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT
|
|
129 -h <logo height> Height of output logo (real # > 0)
|
|
130 -k <kind of data> 0 for amino acid, 1 for nucleic acid
|
|
131 -l <sequence lower bound> Lower bound of sequence (integer)
|
|
132 -m <sequence upper bound> Upper bound of sequence (integer)
|
|
133 -o <output file> Name of output file
|
|
134 -s <sequence start> Sequence start number, defaults to 1 (int)
|
|
135 -t <titletext> Text of title, enclosed in "" if more than one word
|
|
136 -w <logo width> Width of output logo
|
|
137 -x <x-axis label> Label for x-axis
|
|
138 -y <y-axis label> Label for y-axis
|
|
139
|
|
140 Available toggles (no values associated) bOenc
|
|
141 -a Toggle antialiasing
|
|
142 -b Toggle bar ends
|
|
143 -c Toggle color
|
|
144 -e Toggle error bar
|
|
145 -M Toggle small sample correction
|
|
146 -O Toggle outlining of characters
|
|
147 -p Toggle fineprint
|
|
148 -n Toggle numbering of x-axis
|
|
149 -S Toggle stretching of logos to entire length
|
|
150 -X Toggle boxing of characters
|
|
151 -Y Toggle y-axis
|
|
152
|
|
153 END
|
|
154 ;
|
|
155
|
|
156 return $usage;
|
|
157 }
|
|
158
|
|
159 ################################################################################
|
|
160 ##### MAIN FUNCTION #####
|
|
161 ################################################################################
|
|
162
|
|
163 # arguments : $_[0] : file name
|
|
164 MAIN: {
|
|
165 init();
|
|
166
|
|
167 # feed data from file to make height data array reference
|
|
168 my @input = <INPUTFILE>;
|
|
169 close (INPUTFILE);
|
|
170 my %heightparams = (
|
|
171 smallsampletoggle => $opt_M,
|
|
172 input_kind => $opt_k,
|
|
173 stretch => $opt_S
|
|
174 );
|
|
175
|
|
176 my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) =
|
|
177 logo::getHeightData(\@input, \%heightparams);
|
|
178
|
|
179 # check for errors
|
|
180 if ((defined $validformat) && ($validformat == 1)) {
|
|
181 die("Error: Invalid input format does not conform to FASTA, " .
|
|
182 "CLUSTAL, or Flat.\n");
|
|
183 }
|
|
184 if (!$goodlength) {
|
|
185 die("Error: Number of characters in each logo line is not " .
|
|
186 "consistent, starting at: ", $badline, "\n");
|
|
187 }
|
|
188
|
|
189 my %input = (
|
|
190 LOGO_HEIGHT => $opt_h,
|
|
191 LOGO_WIDTH => $opt_w,
|
|
192 COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW",
|
|
193
|
|
194 LOGOSTART => $opt_l,
|
|
195 LOGOEND => $opt_m,
|
|
196 START_NUM => $opt_s,
|
|
197
|
|
198 TITLETEXT => $opt_t,
|
|
199 YAXIS_LABEL => $opt_y,
|
|
200 XAXIS_LABEL => $opt_x,
|
|
201
|
|
202 BOXSHRINK => $opt_d,
|
|
203 CHARSPERLINE => $opt_C,
|
|
204 BARBITS => $opt_B,
|
|
205 TICBITS => $opt_T,
|
|
206 RES => "96",
|
|
207 "FORMAT" => (uc $opt_F),
|
|
208
|
|
209 # toggles
|
|
210 ANTIALIAS => $opt_a,
|
|
211 ERRBAR => $opt_e,
|
|
212 FINEPRINT => $opt_p,
|
|
213 NUMBERING => $opt_n,
|
|
214 OUTLINE => $opt_O,
|
|
215 SHOWENDS => $opt_b,
|
|
216 SHOWINGBOX => $opt_X,
|
|
217 YAXIS => $opt_Y
|
|
218 );
|
|
219
|
|
220 template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH);
|
|
221 }
|
|
222
|
|
223
|
|
224 ################################################################################
|
|
225 ##### FUNCTINOS FOR INIT #####
|
|
226 ################################################################################
|
|
227
|
|
228 # all ints
|
|
229 sub isInt {
|
|
230 return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0;
|
|
231 }
|
|
232
|
|
233 # all reals
|
|
234 sub isReal {
|
|
235 return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0;
|
|
236 }
|
|
237
|
|
238 sub isZeroOrOne {
|
|
239 return ($_[0] == 0 || $_[0] == 1) ? 1 : 0;
|
|
240 }
|
|
241
|
|
242 sub init {
|
|
243
|
|
244 # if (not defined $PATH) {
|
|
245 # die ("PATH must be defined\n");
|
|
246 # } elsif (not -e $PATH) {
|
|
247 # die ("PATH ($PATH) must exist\n");
|
|
248 # } elsif (not -d $PATH) {
|
|
249 # die ("PATH ($PATH) must be a directory\n");
|
|
250 # }
|
|
251
|
|
252 &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY');
|
|
253
|
|
254 if (defined $opt_B &&
|
|
255 (!isReal($opt_B) || $opt_B < 0) ) {
|
|
256 printf("\noption B must be a positive real, but is $opt_B, $!\n");
|
|
257 die &usage();
|
|
258 }
|
|
259 if (defined $opt_d &&
|
|
260 ( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) {
|
|
261 print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n");
|
|
262 die &usage();
|
|
263 }
|
|
264 if (defined $opt_E &&
|
|
265 (!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) {
|
|
266 print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n");
|
|
267 die &usage();
|
|
268 }
|
|
269 if (defined $opt_f) {
|
|
270 open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n";
|
|
271 } else {
|
|
272 print("\ninput file not specified, terminating...\n");
|
|
273 die &usage();
|
|
274 }
|
|
275 if (defined $opt_h &&
|
|
276 (!isReal($opt_h) || $opt_h < 0) ) {
|
|
277 print("\noption h must be a positive real, but is $opt_h, $!\n");
|
|
278 die &usage();
|
|
279 }
|
|
280 if (defined $opt_w &&
|
|
281 (!isReal($opt_w) || $opt_w < 0) ) {
|
|
282 print("\noption w must be a positive real, but is $opt_w, $!\n");
|
|
283 die &usage();
|
|
284 }
|
|
285 if (defined $opt_k &&
|
|
286 (!isZeroOrOne($opt_k)) ) {
|
|
287 print("\noption k must be 0 or 1, but is $opt_k, $!\n");
|
|
288 die &usage();
|
|
289 }
|
|
290
|
|
291 #toggles
|
|
292 if (!defined $opt_a) {
|
|
293 $opt_a = 0;
|
|
294 }
|
|
295 if (!defined $opt_b) {
|
|
296 $opt_b = 0;
|
|
297 }
|
|
298 if (!defined $opt_c) {
|
|
299 $opt_c = 0;
|
|
300 }
|
|
301 if (!defined $opt_e) {
|
|
302 $opt_e = 0;
|
|
303 }
|
|
304 if (!defined $opt_n) {
|
|
305 $opt_n = 0;
|
|
306 }
|
|
307 if (!defined $opt_M) {
|
|
308 $opt_M = 0;
|
|
309 }
|
|
310 if (!defined $opt_O) {
|
|
311 $opt_O = 0;
|
|
312 }
|
|
313 if (!defined $opt_p) {
|
|
314 $opt_p = 0;
|
|
315 }
|
|
316 if (!defined $opt_S) {
|
|
317 $opt_S = 0;
|
|
318 }
|
|
319 if (!defined $opt_X) {
|
|
320 $opt_X = 0;
|
|
321 };
|
|
322 if (!defined $opt_Y) {
|
|
323 $opt_Y = 0;
|
|
324 };
|
|
325
|
|
326 if (!defined $opt_F) {
|
|
327 $opt_F = "EPS"; # default to EPS
|
|
328 }
|
|
329 if (!defined $opt_o) {
|
|
330 $opt_o = "-"; # for standard out
|
|
331 } else {
|
|
332 # $opt_o =~ s/\.\S*$//; # remove extension if there is one
|
|
333 $opt_o .= "." . (lc $opt_F); # make file name
|
|
334 }
|
|
335
|
|
336 if (defined $opt_C &&
|
|
337 (!isInt($opt_C) || $opt_C < 0) ) {
|
|
338 printf("\noption C must be a postive integer, but is $opt_C, $!\n");
|
|
339 die &usage();
|
|
340 }
|
|
341
|
|
342 if (defined $opt_l && !isInt($opt_l)) {
|
|
343 printf("\noption l must be an integer, but is $opt_l, $!\n");
|
|
344 die &usage();
|
|
345 }
|
|
346
|
|
347 if (defined $opt_m && !isInt($opt_m)) {
|
|
348 printf("\noption m must be an integer, but is $opt_m, $!\n");
|
|
349 die &usage();
|
|
350 }
|
|
351
|
|
352 if (defined $opt_s && !isInt($opt_s)) {
|
|
353 printf("\noption s must be an integer, but is $opt_s, $!\n");
|
|
354 die &usage();
|
|
355 }
|
|
356 }
|