diff snpEff_2_1a/scripts/hist.pl @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff_2_1a/scripts/hist.pl	Fri Apr 20 14:47:09 2012 -0400
@@ -0,0 +1,88 @@
+#!/usr/bin/perl
+#-------------------------------------------------------------------------------
+#
+# Plot a histogram (using R) 
+# Data is feed as a 1 column of numbers 
+#
+# Note: Any line that does not match a numeric regular expression, is filtered out).
+#
+#														Pablo Cingolani
+#-------------------------------------------------------------------------------
+
+#-------------------------------------------------------------------------------
+# Main
+#-------------------------------------------------------------------------------
+
+# Parse command line option (file base name)
+$base = 'hist';
+if( $ARGV[0] ne '' )	{ $base = $ARGV[0]; }
+
+$pngFile = "$base.png";
+$txtFile = "$base.txt";
+
+# Read STDIN and create an R vector
+open TXT, "> $txtFile" or die "Cannot open output file '$txtFile'\n";
+print TXT "x\n";
+for( $ln = 0 ; $l = <STDIN> ; ) {
+	chomp $l;
+
+	# Does the string contain exactly one number? (can be float)
+	if( $l =~ /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/ ) { print TXT "$l\n"; }
+}
+close TXT;
+
+#---
+# Create an R program, save histogram plot as PNG image
+#---
+
+open R, "| R --vanilla --slave " or die "Cannot open R program\n";
+print R <<EOF;
+
+histDens <- function( x, title, q=1.0, breaks = 30 ) {
+	# Show only this part of the data
+	xmin <- quantile( x, 1-q )
+	xmax <- quantile( x, q )
+    data <- x[ (x >= xmin) & (x <= xmax) ];
+
+    dens <- density(data)
+
+    h <- hist(data, main=title, xlab = "data", ylab = "Frequency", freq = T, breaks=breaks);
+
+    # Adjust density height to 'frecuency'
+	dens\$y <- max(h\$counts) * dens\$y/max(dens\$y)
+    lines(dens, col='red')
+
+    # Mean & median calculated over the whola data
+    abline( v=mean(x), col='blue', lty=2, lwd=2);
+    abline( v=median(x), col='green', lty=2, lwd=2);
+
+	legend("topright",c("Mean","Median"),lty=c(1,1),col=c("blue","green"))
+
+}
+
+png('$pngFile', width = 1024, height = 1024);
+par( mfrow=c(2,1) );
+
+data <- read.csv("$txtFile", sep='\\t', header = TRUE);
+x <- data\$x
+
+histDens( x, "Histogram: All data", 1.0 );
+histDens( x, "Histogram: Quantile [2% - 98%]", 0.98 );
+
+print( summary( x ) )
+
+dev.off();
+quit( save='no' )
+EOF
+
+close R;
+
+#---
+# Show figure
+#---
+
+$os = `uname`;
+$show = "eog"; 
+if( $os =~ "Darwin" )	{ $show = "open"; }
+`$show $pngFile`;
+