Mercurial > repos > davidvanzessen > mutation_analysis
view seqlogo @ 3:a0b27058dcac draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 17 Sep 2014 07:25:17 -0400 |
parents | 2f4298673519 |
children |
line wrap: on
line source
#!/usr/bin/perl -w =head1 NAME seqlogo - runs the logo creation script =head1 SYNOPSIS seqlogo [OPTION]...-f [FILENAME] =head1 DESCRIPTION Creates a logo for the given input filename. Available options: -B <bar bits> Number of bits in bar (real between 0, 1) -T <tic bits> Number of bits between tic marks -C <chars per line> Number of characters per line of logo -d <box shrink factor> Shrink factor of characters if option c is toggled -E <error bar fraction> Fraction of error bar to show (real between 0, 1 ) -f <input file> Input filename -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT -h <logo height> Height of output logo (real > 0) -k <kind of data> 0 for amino acid, 1 for nucleic acid ; if not defined, a 90% certainty method is used to determine whether the input data is amino acid or nucleic acid -l <sequence lower bound> Lower bound of sequence (integer) -m <sequence upper bound> Upper bound of sequence (integer) -o <output file> Name of output file -s <sequence start> Sequence start number, defaults to 1 (int) -t <titletext> Text of title, enclosed in "" if more than one word -w <logo width> Width of output logo -x <x-axis label> Label for x-axis -y <y-axis label> Label for y-axis Available toggles (no values associated) -a Toggle antialiasing -b Toggle bar ends -c Toggle color -e Toggle error bar -M Toggle small sample correction -O Toggle outlining of characters -n Toggle numbering of x-axis -S Toggle stretching of logos to entire length -X Toggle boxing of characters -Y Toggle y-axis =head1 EXAMPLE The following command takes as input "input.fasta" and returns the logo in the form "logo.eps". Antialiasing, bar ends, color, small sample correction, numbering of x-axis, and y-axis labelling are turned on: seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY =cut use vars qw($PATH); BEGIN { use FindBin qw($Bin); use lib "$Bin"; $PATH = $Bin; ## $PATH = "/h/gary/Seqlogo/Code/"; # $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code"; # unshift(@INC, $PATH); } use logo; use template; use Getopt::Std; use FileHandle; my $opts; $opts = $opt_a || # antialiasing $opt_b || # bar ends (0 or 1) $opt_c || # color $opt_e || # show error bar (0 or 1) $opt_n || # numbering (0 or 1) $opt_M || # small sample correction (0 or 1) $opt_O || # outline (0 or 1) $opt_S || # stretch $opt_X || # box (0 for no box, 1 for box) $opt_Y || # y axis $opt_B || # bar bits (real) $opt_T || # tics bits (real) $opt_C || # chars per line $opt_d || # box shrinking factor (<1) $opt_E || # error bar fraction (real) $opt_f || # input filename $opt_F || # format (PNG, EPS, PDF, GIF) $opt_h || # logo height (cm) $opt_k || # 0 = amino acid, 1 = nucleic acid $opt_l || # lower bound of sequence to put in logo $opt_m || # max bound of sequence to put in logo $opt_o || # output file $opt_s || # start number for very beginning of sequence $opt_t || # title text (string) $opt_w || # logo width (cm) $opt_x || # x axis label $opt_y || # y axis label $opts; ################################################################################ ##### USAGE ##### ################################################################################ sub usage { my $usage = <<END usage: seqlogo -f <input filename> [OPTIONs with values] Creates a logo for the given input filename. Available options: -B <bar bits> Number of bits in bar (real # > 0) -T <tic bits> Number of bits between tic marks -C <chars per line> Number of characters per line of logo -d <box shrink factor> Shrink factor of characters if option c is toggled -E <error bar fraction> Fraction of error bar to show (real # > 0) -f <input filename> Input filename -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT -h <logo height> Height of output logo (real # > 0) -k <kind of data> 0 for amino acid, 1 for nucleic acid -l <sequence lower bound> Lower bound of sequence (integer) -m <sequence upper bound> Upper bound of sequence (integer) -o <output file> Name of output file -s <sequence start> Sequence start number, defaults to 1 (int) -t <titletext> Text of title, enclosed in "" if more than one word -w <logo width> Width of output logo -x <x-axis label> Label for x-axis -y <y-axis label> Label for y-axis Available toggles (no values associated) bOenc -a Toggle antialiasing -b Toggle bar ends -c Toggle color -e Toggle error bar -M Toggle small sample correction -O Toggle outlining of characters -p Toggle fineprint -n Toggle numbering of x-axis -S Toggle stretching of logos to entire length -X Toggle boxing of characters -Y Toggle y-axis END ; return $usage; } ################################################################################ ##### MAIN FUNCTION ##### ################################################################################ # arguments : $_[0] : file name MAIN: { init(); # feed data from file to make height data array reference my @input = <INPUTFILE>; close (INPUTFILE); my %heightparams = ( smallsampletoggle => $opt_M, input_kind => $opt_k, stretch => $opt_S ); my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) = logo::getHeightData(\@input, \%heightparams); # check for errors if ((defined $validformat) && ($validformat == 1)) { die("Error: Invalid input format does not conform to FASTA, " . "CLUSTAL, or Flat.\n"); } if (!$goodlength) { die("Error: Number of characters in each logo line is not " . "consistent, starting at: ", $badline, "\n"); } my %input = ( LOGO_HEIGHT => $opt_h, LOGO_WIDTH => $opt_w, COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW", LOGOSTART => $opt_l, LOGOEND => $opt_m, START_NUM => $opt_s, TITLETEXT => $opt_t, YAXIS_LABEL => $opt_y, XAXIS_LABEL => $opt_x, BOXSHRINK => $opt_d, CHARSPERLINE => $opt_C, BARBITS => $opt_B, TICBITS => $opt_T, RES => "96", "FORMAT" => (uc $opt_F), # toggles ANTIALIAS => $opt_a, ERRBAR => $opt_e, FINEPRINT => $opt_p, NUMBERING => $opt_n, OUTLINE => $opt_O, SHOWENDS => $opt_b, SHOWINGBOX => $opt_X, YAXIS => $opt_Y ); template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH); } ################################################################################ ##### FUNCTINOS FOR INIT ##### ################################################################################ # all ints sub isInt { return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0; } # all reals sub isReal { return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0; } sub isZeroOrOne { return ($_[0] == 0 || $_[0] == 1) ? 1 : 0; } sub init { # if (not defined $PATH) { # die ("PATH must be defined\n"); # } elsif (not -e $PATH) { # die ("PATH ($PATH) must exist\n"); # } elsif (not -d $PATH) { # die ("PATH ($PATH) must be a directory\n"); # } &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY'); if (defined $opt_B && (!isReal($opt_B) || $opt_B < 0) ) { printf("\noption B must be a positive real, but is $opt_B, $!\n"); die &usage(); } if (defined $opt_d && ( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) { print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n"); die &usage(); } if (defined $opt_E && (!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) { print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n"); die &usage(); } if (defined $opt_f) { open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n"; } else { print("\ninput file not specified, terminating...\n"); die &usage(); } if (defined $opt_h && (!isReal($opt_h) || $opt_h < 0) ) { print("\noption h must be a positive real, but is $opt_h, $!\n"); die &usage(); } if (defined $opt_w && (!isReal($opt_w) || $opt_w < 0) ) { print("\noption w must be a positive real, but is $opt_w, $!\n"); die &usage(); } if (defined $opt_k && (!isZeroOrOne($opt_k)) ) { print("\noption k must be 0 or 1, but is $opt_k, $!\n"); die &usage(); } #toggles if (!defined $opt_a) { $opt_a = 0; } if (!defined $opt_b) { $opt_b = 0; } if (!defined $opt_c) { $opt_c = 0; } if (!defined $opt_e) { $opt_e = 0; } if (!defined $opt_n) { $opt_n = 0; } if (!defined $opt_M) { $opt_M = 0; } if (!defined $opt_O) { $opt_O = 0; } if (!defined $opt_p) { $opt_p = 0; } if (!defined $opt_S) { $opt_S = 0; } if (!defined $opt_X) { $opt_X = 0; }; if (!defined $opt_Y) { $opt_Y = 0; }; if (!defined $opt_F) { $opt_F = "EPS"; # default to EPS } if (!defined $opt_o) { $opt_o = "-"; # for standard out } else { # $opt_o =~ s/\.\S*$//; # remove extension if there is one $opt_o .= "." . (lc $opt_F); # make file name } if (defined $opt_C && (!isInt($opt_C) || $opt_C < 0) ) { printf("\noption C must be a postive integer, but is $opt_C, $!\n"); die &usage(); } if (defined $opt_l && !isInt($opt_l)) { printf("\noption l must be an integer, but is $opt_l, $!\n"); die &usage(); } if (defined $opt_m && !isInt($opt_m)) { printf("\noption m must be an integer, but is $opt_m, $!\n"); die &usage(); } if (defined $opt_s && !isInt($opt_s)) { printf("\noption s must be an integer, but is $opt_s, $!\n"); die &usage(); } }