| 80 | 1 #!/usr/bin/env python | 
|  | 2 #Greg Von Kuster | 
|  | 3 | 
|  | 4 import sys | 
|  | 5 #from rpy import * | 
|  | 6 import rpy2.robjects as robjects | 
|  | 7 from rpy2.robjects.packages import importr | 
|  | 8 r = robjects.r | 
|  | 9 grdevices = importr('grDevices') | 
|  | 10 | 
|  | 11 | 
|  | 12 assert sys.version_info[:2] >= ( 2, 4 ) | 
|  | 13 | 
|  | 14 def stop_err(msg): | 
|  | 15     sys.stderr.write(msg) | 
|  | 16     sys.exit() | 
|  | 17 | 
|  | 18 def main(): | 
|  | 19 | 
|  | 20     # Handle input params | 
|  | 21     in_fname = sys.argv[1] | 
|  | 22     out_fname = sys.argv[2] | 
|  | 23     try: | 
|  | 24         column = int( sys.argv[3] ) - 1 | 
|  | 25     except: | 
|  | 26         stop_err( "Column not specified, your query does not contain a column of numerical data." ) | 
|  | 27     title = sys.argv[4] | 
|  | 28     xlab = sys.argv[5] | 
|  | 29     breaks = int( sys.argv[6] ) | 
|  | 30     if breaks == 0: | 
|  | 31         breaks = "Sturges" | 
|  | 32     if sys.argv[7] == "true": | 
|  | 33         density = True | 
|  | 34     else: density = False | 
|  | 35     if len( sys.argv ) >= 9 and sys.argv[8] == "true": | 
|  | 36         frequency = True | 
|  | 37     else: frequency = False | 
|  | 38 | 
|  | 39     matrix = [] | 
|  | 40     skipped_lines = 0 | 
|  | 41     first_invalid_line = 0 | 
|  | 42     invalid_value = '' | 
|  | 43     i = 0 | 
|  | 44     for i, line in enumerate( file( in_fname ) ): | 
|  | 45         valid = True | 
|  | 46         line = line.rstrip('\r\n') | 
|  | 47         # Skip comments | 
|  | 48         if line and not line.startswith( '#' ): | 
|  | 49             # Extract values and convert to floats | 
|  | 50             row = [] | 
|  | 51             try: | 
|  | 52                 fields = line.split( "\t" ) | 
|  | 53                 val = fields[column] | 
|  | 54                 if val.lower() == "na": | 
|  | 55                     row.append( float( "nan" ) ) | 
|  | 56             except: | 
|  | 57                 valid = False | 
|  | 58                 skipped_lines += 1 | 
|  | 59                 if not first_invalid_line: | 
|  | 60                     first_invalid_line = i+1 | 
|  | 61             else: | 
|  | 62                 try: | 
|  | 63                     row.append( float( val ) ) | 
|  | 64                 except ValueError: | 
|  | 65                     valid = False | 
|  | 66                     skipped_lines += 1 | 
|  | 67                     if not first_invalid_line: | 
|  | 68                         first_invalid_line = i+1 | 
|  | 69                         invalid_value = fields[column] | 
|  | 70         else: | 
|  | 71             valid = False | 
|  | 72             skipped_lines += 1 | 
|  | 73             if not first_invalid_line: | 
|  | 74                 first_invalid_line = i+1 | 
|  | 75 | 
|  | 76         if valid: | 
|  | 77             matrix += row | 
|  | 78 | 
|  | 79     if skipped_lines < i: | 
|  | 80         try: | 
|  | 81             #a = r.array( matrix ) | 
|  | 82             fv = robjects.FloatVector(matrix) | 
|  | 83             a = r['matrix'](fv, ncol=1,byrow=True) | 
|  | 84             #r.pdf( out_fname, 8, 8 ) | 
|  | 85             r.pdf( out_fname, 8, 8 ) | 
|  | 86             histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks ) | 
|  | 87             if density: | 
|  | 88                 density = r.density( a ) | 
|  | 89                 if frequency: | 
|  | 90                     scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints | 
|  | 91                     density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] ) | 
|  | 92                 r.lines( density ) | 
|  | 93             #r.dev_off() | 
|  | 94             grdevices.dev_off() | 
|  | 95         except Exception, exc: | 
|  | 96             stop_err( "%s" %str( exc ) ) | 
|  | 97     else: | 
|  | 98         if i == 0: | 
|  | 99             stop_err("Input dataset is empty.") | 
|  | 100         else: | 
|  | 101             stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) | 
|  | 102 | 
|  | 103     print "Histogram of column %s. " %sys.argv[3] | 
|  | 104     if skipped_lines > 0: | 
|  | 105         print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) | 
|  | 106 | 
|  | 107     #r.quit( save="no" ) | 
|  | 108 | 
|  | 109 if __name__ == "__main__": | 
|  | 110     main() |