comparison validate_temperature_data.py @ 0:4290854f3af5 draft

Uploaded
author greg
date Tue, 27 Nov 2018 09:47:31 -0500
parents
children cfe1ce427aa7
comparison
equal deleted inserted replaced
-1:000000000000 0:4290854f3af5
1 #!/usr/bin/env python
2 import argparse
3 import datetime
4 import decimal
5 import re
6 import shutil
7 import sys
8
9 parser = argparse.ArgumentParser()
10 parser.add_argument('--data_type', dest='data_type', default=None, help='Temperature data type, normals or actuals')
11 parser.add_argument('--input_actuals', dest='input_actuals', default=None, help='Daily actuals temperature data')
12 parser.add_argument('--input_normals', dest='input_normals', default=None, help='30 year normals temperature data')
13 parser.add_argument('--output', dest='output', help='Output dataset'),
14 args = parser.parse_args()
15
16 ACTUALS_HEADER = "LATITUDE,LONGITUDE,DATE,DOY,TMIN,TMAX"
17 NORMALS_HEADER = "stationid,latitude,longitude,elev_m,name,st,mmdd,doy,tmin,tmax"
18
19 def add_error_msg(accumulated_msgs, msg):
20 return "%s\n%s" % (accumulated_msgs, msg)
21
22
23 def empty_value(line_no, label, accumulated_msgs):
24 return add_error_msg(accumulated_msgs, "The required %s value is missing on line %d." % (label, line_no))
25
26
27 def stop_error(msg):
28 sys.exit(msg)
29
30
31 def validate_date_string(line_no, date_string, accumulated_msgs):
32 try:
33 datetime.datetime.strptime(date_string, '%Y-%m-%d')
34 return accumulated_msgs
35 except ValueError:
36 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string))
37
38
39 def validate_decimal(line_no, decimal_string, accumulated_msgs, label):
40 try:
41 decimal.Decimal(decimal_string)
42 return accumulated_msgs
43 except Exception:
44 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s decimal value (%s)." % (line_no, label, decimal_string))
45
46
47 def validate_integer(line_no, integer_string, accumulated_msgs, label):
48 if integer_string.isdigit():
49 return accumulated_msgs
50 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s integer value (%s)." % (line_no, label, decimal_string))
51
52
53 def validate_mmdd(line_no, mmdd, accumulated_msgs):
54 try:
55 datetime.datetime.strptime(date_string, '%m-%d')
56 return accumulated_msgs
57 except ValueError:
58 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be mm-dd)." % (line_no, date_string))
59
60
61 accumulated_msgs = ""
62 # Parse the input file, skipping the header, and validating
63 # that each data line consists of 31 comma-separated items.
64 if args.data_type == "normals":
65 input_file = args.input_normals
66 header = NORMALS_HEADER
67 last_doy = 0
68 num_normals_rows = 0
69 else:
70 input_file = args.input_actuals
71 header = ACTUALS_HEADER
72 with open(input_file, "r") as ih:
73 for i, line in enumerate(ih):
74 line = line.rstrip("\r\n")
75 if i == 0:
76 if line != header:
77 accumulated_msgs = add_error_msg(accumulated_msgs, "The header is invalid, must be %s" % header)
78 items = line.split(",")
79 if args.data_type == "normals":
80 if i > 367:
81 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and 366 data lines).")
82 stop_error(accumulated_msgs)
83 if len(items) != 10:
84 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 10)." % (i, len(items)))
85 stop_error(accumulated_msgs)
86 stationid = items[0]
87 if len(station_id) == 0:
88 accumulated_msgs = empty_value(i, "stationid", accumulated_msgs)
89 latitude = items[1]
90 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "latitude")
91 longitude = items[2]
92 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "longitude")
93 elev_m = items[3]
94 accumulated_msgs = validate_decimal(i, elev_m, accumulated_msgs, "elev_m")
95 name = items[4]
96 if len(name) == 0:
97 accumulated_msgs = empty_value(i, "name", accumulated_msgs)
98 st = items[5]
99 if len(st) == 0:
100 accumulated_msgs = empty_value(i, "st", accumulated_msgs)
101 mmdd = items[6]
102 accumulated_msgs = validate_mmdd(i, mmdd, accumulated_msgs)
103 doy = items[7]
104 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy")
105 # Make sure the DOY values are consecutive.
106 try:
107 if int(doy) != (last_doy + 1):
108 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive." % (i, doy))
109 stop_error(accumulated_msgs)
110 else:
111 last_doy += 1
112 except Exception:
113 # The error for an invalid integer was captured above.
114 pass
115 tmin = items[8]
116 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin")
117 tmax = items[9]
118 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax")
119 num_normals_rows += 1
120 else:
121 if i > 367:
122 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and no more than 366 data lines).")
123 stop_error(accumulated_msgs)
124 if len(items) != 6:
125 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 6)." % (i, len(items)))
126 stop_error(accumulated_msgs)
127 latitude = items[0]
128 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "LATITUDE")
129 longitude = items[1]
130 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "LONGITUDE")
131 date_string = items[2]
132 accumulated_msgs = validate_date_string(line_no, date_string, accumulated_msgs)
133 doy = items[3]
134 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy")
135 # Make sure the DOY values are consecutive.
136 if i==0:
137 try:
138 last_doy = int(doy)
139 except Exception:
140 # The error for an invalid integer was captured above.
141 pass
142 else:
143 try:
144 if int(doy) != (last_doy + 1):
145 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive." % (i, doy))
146 stop_error(accumulated_msgs)
147 else:
148 last_doy += 1
149 except Exception:
150 # The error for an invalid integer was captured above.
151 pass
152 tmin = items[8]
153 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin")
154 tmax = items[9]
155 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax")
156 if args.data_type == "normals" and num_normals_rows != 367:
157 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains %d rows, (must be 367)." % num_normals_rows))
158
159 if len(accumulated_msgs) > 0:
160 stop_error(accumulated_msgs)
161
162 shutil.copyfile(input_file, args.output)