annotate validate_temperature_data.py @ 5:467305a51829 draft

Uploaded
author greg
date Tue, 27 Nov 2018 09:57:15 -0500
parents 9ca3ff390666
children 418a11822c5a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env python
4290854f3af5 Uploaded
greg
parents:
diff changeset
2 import argparse
4290854f3af5 Uploaded
greg
parents:
diff changeset
3 import datetime
4290854f3af5 Uploaded
greg
parents:
diff changeset
4 import decimal
4290854f3af5 Uploaded
greg
parents:
diff changeset
5 import re
4290854f3af5 Uploaded
greg
parents:
diff changeset
6 import shutil
4290854f3af5 Uploaded
greg
parents:
diff changeset
7 import sys
4290854f3af5 Uploaded
greg
parents:
diff changeset
8
4290854f3af5 Uploaded
greg
parents:
diff changeset
9 parser = argparse.ArgumentParser()
4290854f3af5 Uploaded
greg
parents:
diff changeset
10 parser.add_argument('--data_type', dest='data_type', default=None, help='Temperature data type, normals or actuals')
4290854f3af5 Uploaded
greg
parents:
diff changeset
11 parser.add_argument('--input_actuals', dest='input_actuals', default=None, help='Daily actuals temperature data')
4290854f3af5 Uploaded
greg
parents:
diff changeset
12 parser.add_argument('--input_normals', dest='input_normals', default=None, help='30 year normals temperature data')
4290854f3af5 Uploaded
greg
parents:
diff changeset
13 parser.add_argument('--output', dest='output', help='Output dataset'),
4290854f3af5 Uploaded
greg
parents:
diff changeset
14 args = parser.parse_args()
4290854f3af5 Uploaded
greg
parents:
diff changeset
15
4290854f3af5 Uploaded
greg
parents:
diff changeset
16 ACTUALS_HEADER = "LATITUDE,LONGITUDE,DATE,DOY,TMIN,TMAX"
4290854f3af5 Uploaded
greg
parents:
diff changeset
17 NORMALS_HEADER = "stationid,latitude,longitude,elev_m,name,st,mmdd,doy,tmin,tmax"
4290854f3af5 Uploaded
greg
parents:
diff changeset
18
4290854f3af5 Uploaded
greg
parents:
diff changeset
19 def add_error_msg(accumulated_msgs, msg):
4290854f3af5 Uploaded
greg
parents:
diff changeset
20 return "%s\n%s" % (accumulated_msgs, msg)
4290854f3af5 Uploaded
greg
parents:
diff changeset
21
4290854f3af5 Uploaded
greg
parents:
diff changeset
22
4290854f3af5 Uploaded
greg
parents:
diff changeset
23 def empty_value(line_no, label, accumulated_msgs):
4290854f3af5 Uploaded
greg
parents:
diff changeset
24 return add_error_msg(accumulated_msgs, "The required %s value is missing on line %d." % (label, line_no))
4290854f3af5 Uploaded
greg
parents:
diff changeset
25
4290854f3af5 Uploaded
greg
parents:
diff changeset
26
4290854f3af5 Uploaded
greg
parents:
diff changeset
27 def stop_error(msg):
4290854f3af5 Uploaded
greg
parents:
diff changeset
28 sys.exit(msg)
4290854f3af5 Uploaded
greg
parents:
diff changeset
29
4290854f3af5 Uploaded
greg
parents:
diff changeset
30
4290854f3af5 Uploaded
greg
parents:
diff changeset
31 def validate_date_string(line_no, date_string, accumulated_msgs):
4290854f3af5 Uploaded
greg
parents:
diff changeset
32 try:
4290854f3af5 Uploaded
greg
parents:
diff changeset
33 datetime.datetime.strptime(date_string, '%Y-%m-%d')
4290854f3af5 Uploaded
greg
parents:
diff changeset
34 return accumulated_msgs
4290854f3af5 Uploaded
greg
parents:
diff changeset
35 except ValueError:
4290854f3af5 Uploaded
greg
parents:
diff changeset
36 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string))
4290854f3af5 Uploaded
greg
parents:
diff changeset
37
4290854f3af5 Uploaded
greg
parents:
diff changeset
38
4290854f3af5 Uploaded
greg
parents:
diff changeset
39 def validate_decimal(line_no, decimal_string, accumulated_msgs, label):
4290854f3af5 Uploaded
greg
parents:
diff changeset
40 try:
4290854f3af5 Uploaded
greg
parents:
diff changeset
41 decimal.Decimal(decimal_string)
4290854f3af5 Uploaded
greg
parents:
diff changeset
42 return accumulated_msgs
4290854f3af5 Uploaded
greg
parents:
diff changeset
43 except Exception:
4290854f3af5 Uploaded
greg
parents:
diff changeset
44 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s decimal value (%s)." % (line_no, label, decimal_string))
4290854f3af5 Uploaded
greg
parents:
diff changeset
45
4290854f3af5 Uploaded
greg
parents:
diff changeset
46
4290854f3af5 Uploaded
greg
parents:
diff changeset
47 def validate_integer(line_no, integer_string, accumulated_msgs, label):
4290854f3af5 Uploaded
greg
parents:
diff changeset
48 if integer_string.isdigit():
4290854f3af5 Uploaded
greg
parents:
diff changeset
49 return accumulated_msgs
4
9ca3ff390666 Uploaded
greg
parents: 3
diff changeset
50 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s integer value (%s)." % (line_no, label, integer_string))
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
51
4290854f3af5 Uploaded
greg
parents:
diff changeset
52
4290854f3af5 Uploaded
greg
parents:
diff changeset
53 def validate_mmdd(line_no, mmdd, accumulated_msgs):
4290854f3af5 Uploaded
greg
parents:
diff changeset
54 try:
3
3de939253e55 Uploaded
greg
parents: 2
diff changeset
55 datetime.datetime.strptime(mmdd, '%m-%d')
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
56 return accumulated_msgs
4290854f3af5 Uploaded
greg
parents:
diff changeset
57 except ValueError:
3
3de939253e55 Uploaded
greg
parents: 2
diff changeset
58 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be mm-dd)." % (line_no, mmdd))
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
59
4290854f3af5 Uploaded
greg
parents:
diff changeset
60
4290854f3af5 Uploaded
greg
parents:
diff changeset
61 accumulated_msgs = ""
4290854f3af5 Uploaded
greg
parents:
diff changeset
62 # Parse the input file, skipping the header, and validating
4290854f3af5 Uploaded
greg
parents:
diff changeset
63 # that each data line consists of 31 comma-separated items.
4290854f3af5 Uploaded
greg
parents:
diff changeset
64 if args.data_type == "normals":
4290854f3af5 Uploaded
greg
parents:
diff changeset
65 input_file = args.input_normals
4290854f3af5 Uploaded
greg
parents:
diff changeset
66 header = NORMALS_HEADER
4290854f3af5 Uploaded
greg
parents:
diff changeset
67 last_doy = 0
4290854f3af5 Uploaded
greg
parents:
diff changeset
68 num_normals_rows = 0
4290854f3af5 Uploaded
greg
parents:
diff changeset
69 else:
4290854f3af5 Uploaded
greg
parents:
diff changeset
70 input_file = args.input_actuals
4290854f3af5 Uploaded
greg
parents:
diff changeset
71 header = ACTUALS_HEADER
4290854f3af5 Uploaded
greg
parents:
diff changeset
72 with open(input_file, "r") as ih:
4290854f3af5 Uploaded
greg
parents:
diff changeset
73 for i, line in enumerate(ih):
4290854f3af5 Uploaded
greg
parents:
diff changeset
74 line = line.rstrip("\r\n")
4290854f3af5 Uploaded
greg
parents:
diff changeset
75 if i == 0:
4290854f3af5 Uploaded
greg
parents:
diff changeset
76 if line != header:
4290854f3af5 Uploaded
greg
parents:
diff changeset
77 accumulated_msgs = add_error_msg(accumulated_msgs, "The header is invalid, must be %s" % header)
5
467305a51829 Uploaded
greg
parents: 4
diff changeset
78 continue
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
79 items = line.split(",")
4290854f3af5 Uploaded
greg
parents:
diff changeset
80 if args.data_type == "normals":
4290854f3af5 Uploaded
greg
parents:
diff changeset
81 if i > 367:
4290854f3af5 Uploaded
greg
parents:
diff changeset
82 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and 366 data lines).")
4290854f3af5 Uploaded
greg
parents:
diff changeset
83 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
84 if len(items) != 10:
4290854f3af5 Uploaded
greg
parents:
diff changeset
85 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 10)." % (i, len(items)))
4290854f3af5 Uploaded
greg
parents:
diff changeset
86 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
87 stationid = items[0]
2
57c418290b61 Uploaded
greg
parents: 1
diff changeset
88 if len(stationid) == 0:
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
89 accumulated_msgs = empty_value(i, "stationid", accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
90 latitude = items[1]
4290854f3af5 Uploaded
greg
parents:
diff changeset
91 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "latitude")
4290854f3af5 Uploaded
greg
parents:
diff changeset
92 longitude = items[2]
4290854f3af5 Uploaded
greg
parents:
diff changeset
93 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "longitude")
4290854f3af5 Uploaded
greg
parents:
diff changeset
94 elev_m = items[3]
4290854f3af5 Uploaded
greg
parents:
diff changeset
95 accumulated_msgs = validate_decimal(i, elev_m, accumulated_msgs, "elev_m")
4290854f3af5 Uploaded
greg
parents:
diff changeset
96 name = items[4]
4290854f3af5 Uploaded
greg
parents:
diff changeset
97 if len(name) == 0:
4290854f3af5 Uploaded
greg
parents:
diff changeset
98 accumulated_msgs = empty_value(i, "name", accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
99 st = items[5]
4290854f3af5 Uploaded
greg
parents:
diff changeset
100 if len(st) == 0:
4290854f3af5 Uploaded
greg
parents:
diff changeset
101 accumulated_msgs = empty_value(i, "st", accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
102 mmdd = items[6]
4290854f3af5 Uploaded
greg
parents:
diff changeset
103 accumulated_msgs = validate_mmdd(i, mmdd, accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
104 doy = items[7]
4290854f3af5 Uploaded
greg
parents:
diff changeset
105 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy")
4290854f3af5 Uploaded
greg
parents:
diff changeset
106 # Make sure the DOY values are consecutive.
4290854f3af5 Uploaded
greg
parents:
diff changeset
107 try:
4290854f3af5 Uploaded
greg
parents:
diff changeset
108 if int(doy) != (last_doy + 1):
4290854f3af5 Uploaded
greg
parents:
diff changeset
109 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive." % (i, doy))
4290854f3af5 Uploaded
greg
parents:
diff changeset
110 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
111 else:
4290854f3af5 Uploaded
greg
parents:
diff changeset
112 last_doy += 1
4290854f3af5 Uploaded
greg
parents:
diff changeset
113 except Exception:
4290854f3af5 Uploaded
greg
parents:
diff changeset
114 # The error for an invalid integer was captured above.
4290854f3af5 Uploaded
greg
parents:
diff changeset
115 pass
4290854f3af5 Uploaded
greg
parents:
diff changeset
116 tmin = items[8]
4290854f3af5 Uploaded
greg
parents:
diff changeset
117 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin")
4290854f3af5 Uploaded
greg
parents:
diff changeset
118 tmax = items[9]
4290854f3af5 Uploaded
greg
parents:
diff changeset
119 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax")
4290854f3af5 Uploaded
greg
parents:
diff changeset
120 num_normals_rows += 1
4290854f3af5 Uploaded
greg
parents:
diff changeset
121 else:
4290854f3af5 Uploaded
greg
parents:
diff changeset
122 if i > 367:
4290854f3af5 Uploaded
greg
parents:
diff changeset
123 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and no more than 366 data lines).")
4290854f3af5 Uploaded
greg
parents:
diff changeset
124 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
125 if len(items) != 6:
4290854f3af5 Uploaded
greg
parents:
diff changeset
126 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 6)." % (i, len(items)))
4290854f3af5 Uploaded
greg
parents:
diff changeset
127 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
128 latitude = items[0]
4290854f3af5 Uploaded
greg
parents:
diff changeset
129 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "LATITUDE")
4290854f3af5 Uploaded
greg
parents:
diff changeset
130 longitude = items[1]
4290854f3af5 Uploaded
greg
parents:
diff changeset
131 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "LONGITUDE")
4290854f3af5 Uploaded
greg
parents:
diff changeset
132 date_string = items[2]
4290854f3af5 Uploaded
greg
parents:
diff changeset
133 accumulated_msgs = validate_date_string(line_no, date_string, accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
134 doy = items[3]
4290854f3af5 Uploaded
greg
parents:
diff changeset
135 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy")
4290854f3af5 Uploaded
greg
parents:
diff changeset
136 # Make sure the DOY values are consecutive.
4290854f3af5 Uploaded
greg
parents:
diff changeset
137 if i==0:
4290854f3af5 Uploaded
greg
parents:
diff changeset
138 try:
4290854f3af5 Uploaded
greg
parents:
diff changeset
139 last_doy = int(doy)
4290854f3af5 Uploaded
greg
parents:
diff changeset
140 except Exception:
4290854f3af5 Uploaded
greg
parents:
diff changeset
141 # The error for an invalid integer was captured above.
4290854f3af5 Uploaded
greg
parents:
diff changeset
142 pass
4290854f3af5 Uploaded
greg
parents:
diff changeset
143 else:
4290854f3af5 Uploaded
greg
parents:
diff changeset
144 try:
4290854f3af5 Uploaded
greg
parents:
diff changeset
145 if int(doy) != (last_doy + 1):
4290854f3af5 Uploaded
greg
parents:
diff changeset
146 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive." % (i, doy))
4290854f3af5 Uploaded
greg
parents:
diff changeset
147 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
148 else:
4290854f3af5 Uploaded
greg
parents:
diff changeset
149 last_doy += 1
4290854f3af5 Uploaded
greg
parents:
diff changeset
150 except Exception:
4290854f3af5 Uploaded
greg
parents:
diff changeset
151 # The error for an invalid integer was captured above.
4290854f3af5 Uploaded
greg
parents:
diff changeset
152 pass
4290854f3af5 Uploaded
greg
parents:
diff changeset
153 tmin = items[8]
4290854f3af5 Uploaded
greg
parents:
diff changeset
154 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin")
4290854f3af5 Uploaded
greg
parents:
diff changeset
155 tmax = items[9]
4290854f3af5 Uploaded
greg
parents:
diff changeset
156 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax")
4290854f3af5 Uploaded
greg
parents:
diff changeset
157 if args.data_type == "normals" and num_normals_rows != 367:
1
cfe1ce427aa7 Uploaded
greg
parents: 0
diff changeset
158 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains %d rows, (must be 367)." % num_normals_rows)
0
4290854f3af5 Uploaded
greg
parents:
diff changeset
159
4290854f3af5 Uploaded
greg
parents:
diff changeset
160 if len(accumulated_msgs) > 0:
4290854f3af5 Uploaded
greg
parents:
diff changeset
161 stop_error(accumulated_msgs)
4290854f3af5 Uploaded
greg
parents:
diff changeset
162
4290854f3af5 Uploaded
greg
parents:
diff changeset
163 shutil.copyfile(input_file, args.output)