Mercurial > repos > bgruening > upload_testing
comparison histogram.py @ 90:b061185bcb83 draft
Uploaded
| author | bernhardlutz |
|---|---|
| date | Thu, 23 Jan 2014 14:53:46 -0500 |
| parents | c4a3a8999945 |
| children |
comparison
equal
deleted
inserted
replaced
| 89:71e953a4191d | 90:b061185bcb83 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Greg Von Kuster | |
| 3 | |
| 4 import sys | |
| 5 #from rpy import * | |
| 6 import rpy2.robjects as robjects | |
| 7 from rpy2.robjects.packages import importr | |
| 8 r = robjects.r | |
| 9 grdevices = importr('grDevices') | |
| 10 | |
| 11 | |
| 12 assert sys.version_info[:2] >= ( 2, 4 ) | |
| 13 | |
| 14 def stop_err(msg): | |
| 15 sys.stderr.write(msg) | |
| 16 sys.exit() | |
| 17 | |
| 18 def main(): | |
| 19 | |
| 20 # Handle input params | |
| 21 in_fname = sys.argv[1] | |
| 22 out_fname = sys.argv[2] | |
| 23 try: | |
| 24 column = int( sys.argv[3] ) - 1 | |
| 25 except: | |
| 26 stop_err( "Column not specified, your query does not contain a column of numerical data." ) | |
| 27 title = sys.argv[4] | |
| 28 xlab = sys.argv[5] | |
| 29 breaks = int( sys.argv[6] ) | |
| 30 if breaks == 0: | |
| 31 breaks = "Sturges" | |
| 32 if sys.argv[7] == "true": | |
| 33 density = True | |
| 34 else: density = False | |
| 35 if len( sys.argv ) >= 9 and sys.argv[8] == "true": | |
| 36 frequency = True | |
| 37 else: frequency = False | |
| 38 | |
| 39 matrix = [] | |
| 40 skipped_lines = 0 | |
| 41 first_invalid_line = 0 | |
| 42 invalid_value = '' | |
| 43 i = 0 | |
| 44 for i, line in enumerate( file( in_fname ) ): | |
| 45 valid = True | |
| 46 line = line.rstrip('\r\n') | |
| 47 # Skip comments | |
| 48 if line and not line.startswith( '#' ): | |
| 49 # Extract values and convert to floats | |
| 50 row = [] | |
| 51 try: | |
| 52 fields = line.split( "\t" ) | |
| 53 val = fields[column] | |
| 54 if val.lower() == "na": | |
| 55 row.append( float( "nan" ) ) | |
| 56 except: | |
| 57 valid = False | |
| 58 skipped_lines += 1 | |
| 59 if not first_invalid_line: | |
| 60 first_invalid_line = i+1 | |
| 61 else: | |
| 62 try: | |
| 63 row.append( float( val ) ) | |
| 64 except ValueError: | |
| 65 valid = False | |
| 66 skipped_lines += 1 | |
| 67 if not first_invalid_line: | |
| 68 first_invalid_line = i+1 | |
| 69 invalid_value = fields[column] | |
| 70 else: | |
| 71 valid = False | |
| 72 skipped_lines += 1 | |
| 73 if not first_invalid_line: | |
| 74 first_invalid_line = i+1 | |
| 75 | |
| 76 if valid: | |
| 77 matrix += row | |
| 78 | |
| 79 if skipped_lines < i: | |
| 80 try: | |
| 81 #a = r.array( matrix ) | |
| 82 fv = robjects.FloatVector(matrix) | |
| 83 a = r['matrix'](fv, ncol=1,byrow=True) | |
| 84 #r.pdf( out_fname, 8, 8 ) | |
| 85 r.pdf( out_fname, 8, 8 ) | |
| 86 histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks ) | |
| 87 if density: | |
| 88 density = r.density( a ) | |
| 89 if frequency: | |
| 90 scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints | |
| 91 density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] ) | |
| 92 r.lines( density ) | |
| 93 #r.dev_off() | |
| 94 grdevices.dev_off() | |
| 95 except Exception, exc: | |
| 96 stop_err( "%s" %str( exc ) ) | |
| 97 else: | |
| 98 if i == 0: | |
| 99 stop_err("Input dataset is empty.") | |
| 100 else: | |
| 101 stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) | |
| 102 | |
| 103 print "Histogram of column %s. " %sys.argv[3] | |
| 104 if skipped_lines > 0: | |
| 105 print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) | |
| 106 | |
| 107 #r.quit( save="no" ) | |
| 108 | |
| 109 if __name__ == "__main__": | |
| 110 main() |
