diff seqlogo @ 2:2f4298673519 draft

Uploaded
author davidvanzessen
date Wed, 10 Sep 2014 10:33:29 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqlogo	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,356 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+  seqlogo - runs the logo creation script 
+
+=head1 SYNOPSIS
+
+  seqlogo [OPTION]...-f [FILENAME]
+
+=head1 DESCRIPTION
+
+    Creates a logo for the given input filename.
+
+    Available options:
+    -B <bar bits>              Number of bits in bar (real between 0, 1)
+    -T <tic bits>              Number of bits between tic marks
+    -C <chars per line>        Number of characters per line of logo
+    -d <box shrink factor>     Shrink factor of characters if option c is toggled
+    -E <error bar fraction>    Fraction of error bar to show (real between 0, 1 )
+    -f <input file>            Input filename
+    -F <format>                Format of output (EPS, GIF, PDF, PNG), - for STDOUT
+    -h <logo height>           Height of output logo (real > 0)
+    -k <kind of data>          0 for amino acid, 1 for nucleic acid ; if not
+                                 defined, a 90% certainty method is used to
+                                 determine whether the input data is amino acid or
+                                 nucleic acid
+    -l <sequence lower bound>  Lower bound of sequence (integer)
+    -m <sequence upper bound>  Upper bound of sequence (integer)
+    -o <output file>           Name of output file
+    -s <sequence start>        Sequence start number, defaults to 1 (int)
+    -t <titletext>             Text of title, enclosed in "" if more than one word
+    -w <logo width>            Width of output logo
+    -x <x-axis label>          Label for x-axis
+    -y <y-axis label>          Label for y-axis
+
+    Available toggles (no values associated)
+    -a       Toggle antialiasing
+    -b       Toggle bar ends
+    -c       Toggle color
+    -e       Toggle error bar
+    -M       Toggle small sample correction
+    -O       Toggle outlining of characters
+    -n       Toggle numbering of x-axis
+    -S       Toggle stretching of logos to entire length
+    -X       Toggle boxing of characters
+    -Y       Toggle y-axis
+
+=head1 EXAMPLE
+
+  The following command takes as input "input.fasta" and returns the logo in the
+  form "logo.eps". Antialiasing, bar ends, color, small sample correction,
+  numbering of x-axis, and y-axis labelling are turned on:
+
+    seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY
+
+=cut
+
+use vars qw($PATH);
+
+BEGIN {
+
+    use FindBin qw($Bin);
+    use lib "$Bin";
+
+    $PATH = $Bin;
+
+##    $PATH = "/h/gary/Seqlogo/Code/";
+#    $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code";
+#    unshift(@INC, $PATH);
+}
+
+use logo;
+use template;
+use Getopt::Std;
+use FileHandle;
+
+
+my $opts;
+$opts =
+  $opt_a || # antialiasing
+  $opt_b || # bar ends (0 or 1)
+  $opt_c || # color
+  $opt_e || # show error bar (0 or 1)
+  $opt_n || # numbering (0 or 1)
+  $opt_M || # small sample correction (0 or 1)
+  $opt_O || # outline (0 or 1)
+  $opt_S || # stretch
+  $opt_X || # box (0 for no box, 1 for box)
+  $opt_Y || # y axis
+
+  $opt_B || # bar bits (real)
+  $opt_T || # tics bits (real)
+  $opt_C || # chars per line
+  $opt_d || # box shrinking factor (<1)
+  $opt_E || # error bar fraction (real)
+  $opt_f || # input filename
+  $opt_F || # format (PNG, EPS, PDF, GIF)
+  $opt_h || # logo height (cm)
+  $opt_k || # 0 = amino acid, 1 = nucleic acid
+  $opt_l || # lower bound of sequence to put in logo
+  $opt_m || # max bound of sequence to put in logo
+  $opt_o || # output file
+  $opt_s || # start number for very beginning of sequence
+  $opt_t || # title text (string)
+  $opt_w || # logo width (cm)
+  $opt_x || # x axis label
+  $opt_y || # y axis label
+$opts;
+
+################################################################################
+#####                              USAGE                                   #####
+################################################################################
+
+sub usage {
+    my $usage = <<END
+
+usage: seqlogo -f <input filename> [OPTIONs with values]
+Creates a logo for the given input filename.
+
+Available options:
+  -B <bar bits>              Number of bits in bar (real # > 0)
+  -T <tic bits>              Number of bits between tic marks
+  -C <chars per line>        Number of characters per line of logo
+  -d <box shrink factor>     Shrink factor of characters if option c is toggled
+  -E <error bar fraction>    Fraction of error bar to show (real # > 0)
+  -f <input filename>        Input filename
+  -F <format>                Format of output (EPS, GIF, PDF, PNG), - for STDOUT
+  -h <logo height>           Height of output logo (real # > 0)
+  -k <kind of data>          0 for amino acid, 1 for nucleic acid
+  -l <sequence lower bound>  Lower bound of sequence (integer)
+  -m <sequence upper bound>  Upper bound of sequence (integer)
+  -o <output file>           Name of output file
+  -s <sequence start>        Sequence start number, defaults to 1 (int)
+  -t <titletext>             Text of title, enclosed in "" if more than one word
+  -w <logo width>            Width of output logo
+  -x <x-axis label>          Label for x-axis
+  -y <y-axis label>          Label for y-axis
+
+Available toggles (no values associated) bOenc
+  -a       Toggle antialiasing
+  -b       Toggle bar ends
+  -c       Toggle color
+  -e       Toggle error bar
+  -M       Toggle small sample correction
+  -O       Toggle outlining of characters
+  -p       Toggle fineprint
+  -n       Toggle numbering of x-axis
+  -S       Toggle stretching of logos to entire length
+  -X       Toggle boxing of characters
+  -Y       Toggle y-axis
+
+END
+    ;
+
+    return $usage;
+}
+
+################################################################################
+#####                         MAIN FUNCTION                                #####
+################################################################################
+
+# arguments : $_[0] : file name
+MAIN: {
+    init();
+
+    # feed data from file to make height data array reference
+    my @input = <INPUTFILE>;
+    close (INPUTFILE);
+    my %heightparams = (
+			smallsampletoggle => $opt_M,
+			input_kind => $opt_k,
+			stretch => $opt_S
+			);
+
+    my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) =
+	logo::getHeightData(\@input, \%heightparams);
+
+    # check for errors
+    if ((defined $validformat) && ($validformat == 1)) {
+	die("Error: Invalid input format does not conform to FASTA, " .
+	    "CLUSTAL, or Flat.\n");
+    }
+    if (!$goodlength) {
+	die("Error: Number of characters in each logo line is not " .
+	    "consistent, starting at: ", $badline, "\n");
+    }
+
+    my %input = (
+		 LOGO_HEIGHT => $opt_h,
+		 LOGO_WIDTH => $opt_w,
+		 COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW",
+
+		 LOGOSTART => $opt_l,
+		 LOGOEND => $opt_m,
+		 START_NUM => $opt_s,
+
+		 TITLETEXT => $opt_t,
+		 YAXIS_LABEL => $opt_y,
+		 XAXIS_LABEL => $opt_x,
+
+		 BOXSHRINK => $opt_d,
+		 CHARSPERLINE => $opt_C,
+		 BARBITS => $opt_B,
+		 TICBITS => $opt_T,
+		 RES => "96",
+		 "FORMAT" => (uc $opt_F),
+
+                 # toggles
+		 ANTIALIAS => $opt_a,
+	         ERRBAR => $opt_e,
+		 FINEPRINT => $opt_p,
+		 NUMBERING => $opt_n,
+		 OUTLINE => $opt_O,
+		 SHOWENDS => $opt_b,
+		 SHOWINGBOX => $opt_X,
+                 YAXIS => $opt_Y
+		 );
+
+    template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH);
+}
+
+
+################################################################################
+#####                      FUNCTINOS FOR INIT                              #####
+################################################################################
+
+# all ints
+sub isInt {
+    return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0;
+}
+
+# all reals
+sub isReal {
+    return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0;
+}
+
+sub isZeroOrOne {
+    return ($_[0] == 0 || $_[0] == 1) ? 1 : 0;
+}
+
+sub init {
+
+#  if (not defined $PATH) {
+#      die ("PATH must be defined\n");
+#  } elsif (not -e $PATH) {
+#      die ("PATH ($PATH) must exist\n");
+#  } elsif (not -d $PATH) {
+#      die ("PATH ($PATH) must be a directory\n");
+#  }
+
+  &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY');
+
+    if (defined $opt_B &&
+	(!isReal($opt_B) || $opt_B < 0) ) {
+	printf("\noption B must be a positive real, but is $opt_B, $!\n");
+	die &usage();
+    }
+    if (defined $opt_d && 
+	( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) {
+        print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n");
+        die &usage();
+    }
+    if (defined $opt_E &&
+	(!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) {
+        print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n");
+        die &usage();
+    }
+    if (defined $opt_f) {
+        open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n";
+    } else {
+	print("\ninput file not specified, terminating...\n");
+        die &usage();
+    }
+    if (defined $opt_h && 
+	(!isReal($opt_h) || $opt_h < 0) ) {
+        print("\noption h must be a positive real, but is $opt_h, $!\n");
+        die &usage();
+    }
+    if (defined $opt_w && 
+	(!isReal($opt_w) || $opt_w < 0) ) {
+        print("\noption w must be a positive real, but is $opt_w, $!\n");
+        die &usage();
+    }
+    if (defined $opt_k &&
+	(!isZeroOrOne($opt_k)) ) {
+	print("\noption k must be 0 or 1, but is $opt_k, $!\n");
+	die &usage();
+    }
+
+    #toggles
+    if (!defined $opt_a) {
+	$opt_a = 0;
+    }
+    if (!defined $opt_b) {
+	$opt_b = 0;
+    }
+    if (!defined $opt_c) {
+	$opt_c = 0;
+    }
+    if (!defined $opt_e) {
+	$opt_e = 0;
+    }
+    if (!defined $opt_n) {
+	$opt_n = 0;
+    }
+    if (!defined $opt_M) {
+	$opt_M = 0;
+    }
+    if (!defined $opt_O) {
+	$opt_O = 0;
+    }
+    if (!defined $opt_p) {
+	$opt_p = 0;
+    }
+    if (!defined $opt_S) {
+	$opt_S = 0;
+    }
+    if (!defined $opt_X) {
+        $opt_X = 0;
+    };
+    if (!defined $opt_Y) {
+        $opt_Y = 0;
+    };
+
+    if (!defined $opt_F) {
+	$opt_F = "EPS";  # default to EPS
+    }
+    if (!defined $opt_o) {
+	$opt_o = "-";    # for standard out
+    } else {
+#	$opt_o =~ s/\.\S*$//;         # remove extension if there is one
+	$opt_o .= "." . (lc $opt_F);  # make file name
+    }
+
+    if (defined $opt_C &&
+	(!isInt($opt_C) || $opt_C < 0) ) {
+        printf("\noption C must be a postive integer, but is $opt_C, $!\n");
+        die &usage();
+    }
+
+    if (defined $opt_l && !isInt($opt_l)) {
+	printf("\noption l must be an integer, but is $opt_l, $!\n");
+	die &usage();
+    }
+
+    if (defined $opt_m && !isInt($opt_m)) {
+	printf("\noption m must be an integer, but is $opt_m, $!\n");
+	die &usage();
+    }
+
+    if (defined $opt_s && !isInt($opt_s)) {
+        printf("\noption s must be an integer, but is $opt_s, $!\n");
+        die &usage();
+    }
+}