0
|
1 #!/usr/bin/env python
|
|
2 import argparse
|
|
3 import datetime
|
|
4 import decimal
|
|
5 import re
|
|
6 import shutil
|
|
7 import sys
|
|
8
|
|
9 parser = argparse.ArgumentParser()
|
|
10 parser.add_argument('--data_type', dest='data_type', default=None, help='Temperature data type, normals or actuals')
|
|
11 parser.add_argument('--input_actuals', dest='input_actuals', default=None, help='Daily actuals temperature data')
|
|
12 parser.add_argument('--input_normals', dest='input_normals', default=None, help='30 year normals temperature data')
|
|
13 parser.add_argument('--output', dest='output', help='Output dataset'),
|
|
14 args = parser.parse_args()
|
|
15
|
|
16 ACTUALS_HEADER = "LATITUDE,LONGITUDE,DATE,DOY,TMIN,TMAX"
|
|
17 NORMALS_HEADER = "stationid,latitude,longitude,elev_m,name,st,mmdd,doy,tmin,tmax"
|
|
18
|
|
19 def add_error_msg(accumulated_msgs, msg):
|
|
20 return "%s\n%s" % (accumulated_msgs, msg)
|
|
21
|
|
22
|
|
23 def empty_value(line_no, label, accumulated_msgs):
|
|
24 return add_error_msg(accumulated_msgs, "The required %s value is missing on line %d." % (label, line_no))
|
|
25
|
|
26
|
|
27 def stop_error(msg):
|
|
28 sys.exit(msg)
|
|
29
|
|
30
|
|
31 def validate_date_string(line_no, date_string, accumulated_msgs):
|
|
32 try:
|
|
33 datetime.datetime.strptime(date_string, '%Y-%m-%d')
|
|
34 return accumulated_msgs
|
|
35 except ValueError:
|
|
36 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string))
|
|
37
|
|
38
|
|
39 def validate_decimal(line_no, decimal_string, accumulated_msgs, label):
|
|
40 try:
|
|
41 decimal.Decimal(decimal_string)
|
|
42 return accumulated_msgs
|
|
43 except Exception:
|
|
44 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s decimal value (%s)." % (line_no, label, decimal_string))
|
|
45
|
|
46
|
|
47 def validate_integer(line_no, integer_string, accumulated_msgs, label):
|
|
48 if integer_string.isdigit():
|
|
49 return accumulated_msgs
|
4
|
50 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s integer value (%s)." % (line_no, label, integer_string))
|
0
|
51
|
|
52
|
|
53 def validate_mmdd(line_no, mmdd, accumulated_msgs):
|
|
54 try:
|
3
|
55 datetime.datetime.strptime(mmdd, '%m-%d')
|
0
|
56 return accumulated_msgs
|
|
57 except ValueError:
|
7
|
58 # Handle Feb 29.
|
|
59 items = mmdd.split("-")
|
|
60 try:
|
|
61 month = int(items[0])
|
|
62 day = int(items[1])
|
|
63 if month == 2 and day == 29:
|
|
64 return accumulated_msgs
|
8
|
65 except Exception:
|
7
|
66 # Error message accumulated below.
|
|
67 pass
|
3
|
68 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be mm-dd)." % (line_no, mmdd))
|
0
|
69
|
|
70
|
|
71 accumulated_msgs = ""
|
|
72 # Parse the input file, skipping the header, and validating
|
|
73 # that each data line consists of 31 comma-separated items.
|
|
74 if args.data_type == "normals":
|
|
75 input_file = args.input_normals
|
|
76 last_doy = 0
|
|
77 num_normals_rows = 0
|
|
78 else:
|
|
79 input_file = args.input_actuals
|
|
80 with open(input_file, "r") as ih:
|
|
81 for i, line in enumerate(ih):
|
|
82 line = line.rstrip("\r\n")
|
|
83 items = line.split(",")
|
|
84 if args.data_type == "normals":
|
7
|
85 num_normals_rows += 1
|
9
|
86 if i == 0:
|
|
87 if line != NORMALS_HEADER:
|
|
88 accumulated_msgs = add_error_msg(accumulated_msgs, "The header is invalid, must be %s" % NORMALS_HEADER)
|
|
89 continue
|
0
|
90 if i > 367:
|
|
91 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and 366 data lines).")
|
|
92 stop_error(accumulated_msgs)
|
|
93 if len(items) != 10:
|
|
94 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 10)." % (i, len(items)))
|
|
95 stop_error(accumulated_msgs)
|
6
|
96 stationid = items[0].strip()
|
2
|
97 if len(stationid) == 0:
|
0
|
98 accumulated_msgs = empty_value(i, "stationid", accumulated_msgs)
|
6
|
99 latitude = items[1].strip()
|
0
|
100 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "latitude")
|
6
|
101 longitude = items[2].strip()
|
0
|
102 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "longitude")
|
6
|
103 elev_m = items[3].strip()
|
0
|
104 accumulated_msgs = validate_decimal(i, elev_m, accumulated_msgs, "elev_m")
|
6
|
105 name = items[4].strip()
|
0
|
106 if len(name) == 0:
|
|
107 accumulated_msgs = empty_value(i, "name", accumulated_msgs)
|
6
|
108 st = items[5].strip()
|
0
|
109 if len(st) == 0:
|
|
110 accumulated_msgs = empty_value(i, "st", accumulated_msgs)
|
6
|
111 mmdd = items[6].strip()
|
0
|
112 accumulated_msgs = validate_mmdd(i, mmdd, accumulated_msgs)
|
6
|
113 doy = items[7].strip()
|
0
|
114 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy")
|
|
115 # Make sure the DOY values are consecutive.
|
|
116 try:
|
|
117 if int(doy) != (last_doy + 1):
|
|
118 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive." % (i, doy))
|
|
119 stop_error(accumulated_msgs)
|
|
120 else:
|
|
121 last_doy += 1
|
|
122 except Exception:
|
|
123 # The error for an invalid integer was captured above.
|
|
124 pass
|
6
|
125 tmin = items[8].strip()
|
0
|
126 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin")
|
6
|
127 tmax = items[9].strip()
|
0
|
128 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax")
|
|
129 else:
|
9
|
130 if i == 0:
|
|
131 if line != ACTUALS_HEADER:
|
|
132 accumulated_msgs = add_error_msg(accumulated_msgs, "The header is invalid, must be %s" % ACTUALS_HEADER)
|
|
133 continue
|
0
|
134 if i > 367:
|
|
135 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and no more than 366 data lines).")
|
|
136 stop_error(accumulated_msgs)
|
|
137 if len(items) != 6:
|
|
138 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 6)." % (i, len(items)))
|
|
139 stop_error(accumulated_msgs)
|
6
|
140 latitude = items[0].strip()
|
0
|
141 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "LATITUDE")
|
6
|
142 longitude = items[1].strip()
|
0
|
143 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "LONGITUDE")
|
6
|
144 date_string = items[2].strip()
|
10
|
145 accumulated_msgs = validate_date_string(i, date_string, accumulated_msgs)
|
6
|
146 doy = items[3].strip()
|
0
|
147 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy")
|
|
148 # Make sure the DOY values are consecutive.
|
|
149 if i==0:
|
|
150 try:
|
|
151 last_doy = int(doy)
|
|
152 except Exception:
|
|
153 # The error for an invalid integer was captured above.
|
|
154 pass
|
|
155 else:
|
|
156 try:
|
|
157 if int(doy) != (last_doy + 1):
|
|
158 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive." % (i, doy))
|
|
159 stop_error(accumulated_msgs)
|
|
160 except Exception:
|
|
161 # The error for an invalid integer was captured above.
|
|
162 pass
|
11
|
163 last_doy += 1
|
6
|
164 tmin = items[4].strip()
|
0
|
165 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin")
|
6
|
166 tmax = items[5].strip()
|
0
|
167 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax")
|
|
168 if args.data_type == "normals" and num_normals_rows != 367:
|
1
|
169 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains %d rows, (must be 367)." % num_normals_rows)
|
0
|
170
|
|
171 if len(accumulated_msgs) > 0:
|
|
172 stop_error(accumulated_msgs)
|
|
173
|
|
174 shutil.copyfile(input_file, args.output)
|