comparison seqlogo @ 2:2f4298673519 draft

Uploaded
author davidvanzessen
date Wed, 10 Sep 2014 10:33:29 -0400
parents
children
comparison
equal deleted inserted replaced
1:856b5b718d21 2:2f4298673519
1 #!/usr/bin/perl -w
2
3 =head1 NAME
4
5 seqlogo - runs the logo creation script
6
7 =head1 SYNOPSIS
8
9 seqlogo [OPTION]...-f [FILENAME]
10
11 =head1 DESCRIPTION
12
13 Creates a logo for the given input filename.
14
15 Available options:
16 -B <bar bits> Number of bits in bar (real between 0, 1)
17 -T <tic bits> Number of bits between tic marks
18 -C <chars per line> Number of characters per line of logo
19 -d <box shrink factor> Shrink factor of characters if option c is toggled
20 -E <error bar fraction> Fraction of error bar to show (real between 0, 1 )
21 -f <input file> Input filename
22 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT
23 -h <logo height> Height of output logo (real > 0)
24 -k <kind of data> 0 for amino acid, 1 for nucleic acid ; if not
25 defined, a 90% certainty method is used to
26 determine whether the input data is amino acid or
27 nucleic acid
28 -l <sequence lower bound> Lower bound of sequence (integer)
29 -m <sequence upper bound> Upper bound of sequence (integer)
30 -o <output file> Name of output file
31 -s <sequence start> Sequence start number, defaults to 1 (int)
32 -t <titletext> Text of title, enclosed in "" if more than one word
33 -w <logo width> Width of output logo
34 -x <x-axis label> Label for x-axis
35 -y <y-axis label> Label for y-axis
36
37 Available toggles (no values associated)
38 -a Toggle antialiasing
39 -b Toggle bar ends
40 -c Toggle color
41 -e Toggle error bar
42 -M Toggle small sample correction
43 -O Toggle outlining of characters
44 -n Toggle numbering of x-axis
45 -S Toggle stretching of logos to entire length
46 -X Toggle boxing of characters
47 -Y Toggle y-axis
48
49 =head1 EXAMPLE
50
51 The following command takes as input "input.fasta" and returns the logo in the
52 form "logo.eps". Antialiasing, bar ends, color, small sample correction,
53 numbering of x-axis, and y-axis labelling are turned on:
54
55 seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY
56
57 =cut
58
59 use vars qw($PATH);
60
61 BEGIN {
62
63 use FindBin qw($Bin);
64 use lib "$Bin";
65
66 $PATH = $Bin;
67
68 ## $PATH = "/h/gary/Seqlogo/Code/";
69 # $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code";
70 # unshift(@INC, $PATH);
71 }
72
73 use logo;
74 use template;
75 use Getopt::Std;
76 use FileHandle;
77
78
79 my $opts;
80 $opts =
81 $opt_a || # antialiasing
82 $opt_b || # bar ends (0 or 1)
83 $opt_c || # color
84 $opt_e || # show error bar (0 or 1)
85 $opt_n || # numbering (0 or 1)
86 $opt_M || # small sample correction (0 or 1)
87 $opt_O || # outline (0 or 1)
88 $opt_S || # stretch
89 $opt_X || # box (0 for no box, 1 for box)
90 $opt_Y || # y axis
91
92 $opt_B || # bar bits (real)
93 $opt_T || # tics bits (real)
94 $opt_C || # chars per line
95 $opt_d || # box shrinking factor (<1)
96 $opt_E || # error bar fraction (real)
97 $opt_f || # input filename
98 $opt_F || # format (PNG, EPS, PDF, GIF)
99 $opt_h || # logo height (cm)
100 $opt_k || # 0 = amino acid, 1 = nucleic acid
101 $opt_l || # lower bound of sequence to put in logo
102 $opt_m || # max bound of sequence to put in logo
103 $opt_o || # output file
104 $opt_s || # start number for very beginning of sequence
105 $opt_t || # title text (string)
106 $opt_w || # logo width (cm)
107 $opt_x || # x axis label
108 $opt_y || # y axis label
109 $opts;
110
111 ################################################################################
112 ##### USAGE #####
113 ################################################################################
114
115 sub usage {
116 my $usage = <<END
117
118 usage: seqlogo -f <input filename> [OPTIONs with values]
119 Creates a logo for the given input filename.
120
121 Available options:
122 -B <bar bits> Number of bits in bar (real # > 0)
123 -T <tic bits> Number of bits between tic marks
124 -C <chars per line> Number of characters per line of logo
125 -d <box shrink factor> Shrink factor of characters if option c is toggled
126 -E <error bar fraction> Fraction of error bar to show (real # > 0)
127 -f <input filename> Input filename
128 -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT
129 -h <logo height> Height of output logo (real # > 0)
130 -k <kind of data> 0 for amino acid, 1 for nucleic acid
131 -l <sequence lower bound> Lower bound of sequence (integer)
132 -m <sequence upper bound> Upper bound of sequence (integer)
133 -o <output file> Name of output file
134 -s <sequence start> Sequence start number, defaults to 1 (int)
135 -t <titletext> Text of title, enclosed in "" if more than one word
136 -w <logo width> Width of output logo
137 -x <x-axis label> Label for x-axis
138 -y <y-axis label> Label for y-axis
139
140 Available toggles (no values associated) bOenc
141 -a Toggle antialiasing
142 -b Toggle bar ends
143 -c Toggle color
144 -e Toggle error bar
145 -M Toggle small sample correction
146 -O Toggle outlining of characters
147 -p Toggle fineprint
148 -n Toggle numbering of x-axis
149 -S Toggle stretching of logos to entire length
150 -X Toggle boxing of characters
151 -Y Toggle y-axis
152
153 END
154 ;
155
156 return $usage;
157 }
158
159 ################################################################################
160 ##### MAIN FUNCTION #####
161 ################################################################################
162
163 # arguments : $_[0] : file name
164 MAIN: {
165 init();
166
167 # feed data from file to make height data array reference
168 my @input = <INPUTFILE>;
169 close (INPUTFILE);
170 my %heightparams = (
171 smallsampletoggle => $opt_M,
172 input_kind => $opt_k,
173 stretch => $opt_S
174 );
175
176 my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) =
177 logo::getHeightData(\@input, \%heightparams);
178
179 # check for errors
180 if ((defined $validformat) && ($validformat == 1)) {
181 die("Error: Invalid input format does not conform to FASTA, " .
182 "CLUSTAL, or Flat.\n");
183 }
184 if (!$goodlength) {
185 die("Error: Number of characters in each logo line is not " .
186 "consistent, starting at: ", $badline, "\n");
187 }
188
189 my %input = (
190 LOGO_HEIGHT => $opt_h,
191 LOGO_WIDTH => $opt_w,
192 COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW",
193
194 LOGOSTART => $opt_l,
195 LOGOEND => $opt_m,
196 START_NUM => $opt_s,
197
198 TITLETEXT => $opt_t,
199 YAXIS_LABEL => $opt_y,
200 XAXIS_LABEL => $opt_x,
201
202 BOXSHRINK => $opt_d,
203 CHARSPERLINE => $opt_C,
204 BARBITS => $opt_B,
205 TICBITS => $opt_T,
206 RES => "96",
207 "FORMAT" => (uc $opt_F),
208
209 # toggles
210 ANTIALIAS => $opt_a,
211 ERRBAR => $opt_e,
212 FINEPRINT => $opt_p,
213 NUMBERING => $opt_n,
214 OUTLINE => $opt_O,
215 SHOWENDS => $opt_b,
216 SHOWINGBOX => $opt_X,
217 YAXIS => $opt_Y
218 );
219
220 template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH);
221 }
222
223
224 ################################################################################
225 ##### FUNCTINOS FOR INIT #####
226 ################################################################################
227
228 # all ints
229 sub isInt {
230 return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0;
231 }
232
233 # all reals
234 sub isReal {
235 return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0;
236 }
237
238 sub isZeroOrOne {
239 return ($_[0] == 0 || $_[0] == 1) ? 1 : 0;
240 }
241
242 sub init {
243
244 # if (not defined $PATH) {
245 # die ("PATH must be defined\n");
246 # } elsif (not -e $PATH) {
247 # die ("PATH ($PATH) must exist\n");
248 # } elsif (not -d $PATH) {
249 # die ("PATH ($PATH) must be a directory\n");
250 # }
251
252 &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY');
253
254 if (defined $opt_B &&
255 (!isReal($opt_B) || $opt_B < 0) ) {
256 printf("\noption B must be a positive real, but is $opt_B, $!\n");
257 die &usage();
258 }
259 if (defined $opt_d &&
260 ( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) {
261 print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n");
262 die &usage();
263 }
264 if (defined $opt_E &&
265 (!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) {
266 print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n");
267 die &usage();
268 }
269 if (defined $opt_f) {
270 open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n";
271 } else {
272 print("\ninput file not specified, terminating...\n");
273 die &usage();
274 }
275 if (defined $opt_h &&
276 (!isReal($opt_h) || $opt_h < 0) ) {
277 print("\noption h must be a positive real, but is $opt_h, $!\n");
278 die &usage();
279 }
280 if (defined $opt_w &&
281 (!isReal($opt_w) || $opt_w < 0) ) {
282 print("\noption w must be a positive real, but is $opt_w, $!\n");
283 die &usage();
284 }
285 if (defined $opt_k &&
286 (!isZeroOrOne($opt_k)) ) {
287 print("\noption k must be 0 or 1, but is $opt_k, $!\n");
288 die &usage();
289 }
290
291 #toggles
292 if (!defined $opt_a) {
293 $opt_a = 0;
294 }
295 if (!defined $opt_b) {
296 $opt_b = 0;
297 }
298 if (!defined $opt_c) {
299 $opt_c = 0;
300 }
301 if (!defined $opt_e) {
302 $opt_e = 0;
303 }
304 if (!defined $opt_n) {
305 $opt_n = 0;
306 }
307 if (!defined $opt_M) {
308 $opt_M = 0;
309 }
310 if (!defined $opt_O) {
311 $opt_O = 0;
312 }
313 if (!defined $opt_p) {
314 $opt_p = 0;
315 }
316 if (!defined $opt_S) {
317 $opt_S = 0;
318 }
319 if (!defined $opt_X) {
320 $opt_X = 0;
321 };
322 if (!defined $opt_Y) {
323 $opt_Y = 0;
324 };
325
326 if (!defined $opt_F) {
327 $opt_F = "EPS"; # default to EPS
328 }
329 if (!defined $opt_o) {
330 $opt_o = "-"; # for standard out
331 } else {
332 # $opt_o =~ s/\.\S*$//; # remove extension if there is one
333 $opt_o .= "." . (lc $opt_F); # make file name
334 }
335
336 if (defined $opt_C &&
337 (!isInt($opt_C) || $opt_C < 0) ) {
338 printf("\noption C must be a postive integer, but is $opt_C, $!\n");
339 die &usage();
340 }
341
342 if (defined $opt_l && !isInt($opt_l)) {
343 printf("\noption l must be an integer, but is $opt_l, $!\n");
344 die &usage();
345 }
346
347 if (defined $opt_m && !isInt($opt_m)) {
348 printf("\noption m must be an integer, but is $opt_m, $!\n");
349 die &usage();
350 }
351
352 if (defined $opt_s && !isInt($opt_s)) {
353 printf("\noption s must be an integer, but is $opt_s, $!\n");
354 die &usage();
355 }
356 }