Mercurial > repos > greg > validate_affy_metadata
comparison validate_affy_metadata.py @ 25:d9f3bcfeecfe draft default tip
Uploaded
author | greg |
---|---|
date | Thu, 15 Aug 2019 13:15:22 -0400 |
parents | 8a826d1afe69 |
children |
comparison
equal
deleted
inserted
replaced
24:8a826d1afe69 | 25:d9f3bcfeecfe |
---|---|
35 return 'True' | 35 return 'True' |
36 else: | 36 else: |
37 return 'False' | 37 return 'False' |
38 | 38 |
39 | 39 |
40 def validate_date_string(line_no, date_string, accumulated_msgs): | 40 def validate_date_string(line_no, date_string, column, accumulated_msgs): |
41 if len(date_string) == 0: | 41 if len(date_string) == 0: |
42 return accumulated_msgs | 42 return accumulated_msgs |
43 try: | 43 try: |
44 datetime.datetime.strptime(date_string, '%Y-%m-%d') | 44 datetime.datetime.strptime(date_string, '%Y-%m-%d') |
45 return accumulated_msgs | 45 return accumulated_msgs |
46 except ValueError: | 46 except ValueError: |
47 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string)) | 47 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD) for column %s." % (line_no, date_string, column)) |
48 | 48 |
49 | 49 |
50 def validate_decimal(line_no, decimal_string, accumulated_msgs): | 50 def validate_decimal(line_no, decimal_string, column, accumulated_msgs): |
51 try: | 51 try: |
52 decimal.Decimal(decimal_string) | 52 decimal.Decimal(decimal_string) |
53 return accumulated_msgs | 53 return accumulated_msgs |
54 except Exception: | 54 except Exception: |
55 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect decimal value (%s)." % (line_no, decimal_string)) | 55 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect decimal value (%s) for column %s." % (line_no, decimal_string, column)) |
56 | 56 |
57 | 57 |
58 def validate_email(line_no, email, accumulated_msgs): | 58 def validate_email(line_no, email, accumulated_msgs): |
59 if not (VALID_EMAIL_RE.match(email)): | 59 if not (VALID_EMAIL_RE.match(email)): |
60 return add_error_msg(accumulated_msgs, "Line %d contains an invalid email address (%s). " % (line_no, email)) | 60 return add_error_msg(accumulated_msgs, "Line %d contains an invalid email address (%s). " % (line_no, email)) |
69 with open(args.input, "r") as ih: | 69 with open(args.input, "r") as ih: |
70 for i, line in enumerate(ih): | 70 for i, line in enumerate(ih): |
71 if i == 0: | 71 if i == 0: |
72 # Skip the header. | 72 # Skip the header. |
73 continue | 73 continue |
74 # Keep 1-based line value for error messages. | |
75 line_no = i + 1 | |
74 line = line.rstrip("\r\n") | 76 line = line.rstrip("\r\n") |
75 if i > 97: | 77 if i > 97: |
76 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).") | 78 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).") |
77 stop_error(accumulated_msgs) | 79 stop_error(accumulated_msgs) |
78 items = line.split("\t") | 80 items = line.split("\t") |
79 if len(items) != 32: | 81 if len(items) != 32: |
80 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 32)." % (i, len(items))) | 82 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 32)." % (line_no, len(items))) |
81 stop_error(accumulated_msgs) | 83 stop_error(accumulated_msgs) |
82 # Required and validated. | 84 # Required and validated. |
83 # Required. | 85 # Required. |
84 user_specimen_id = items[0] | 86 user_specimen_id = items[0] |
85 if len(user_specimen_id) == 0: | 87 if len(user_specimen_id) == 0: |
86 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) | 88 accumulated_msgs = empty_value(line_no, "user_specimen_id", accumulated_msgs) |
87 # Optional. | 89 # Optional. |
88 field_call = items[1] | 90 field_call = items[1] |
89 # Optional. | 91 # Optional. |
90 bcoral_genet_id = items[2] | 92 bcoral_genet_id = items[2] |
91 # Optional. | 93 # Optional. |
92 bsym_genet_id = items[3] | 94 bsym_genet_id = items[3] |
93 # Required. | 95 # Required. |
94 reef = items[4] | 96 reef = items[4] |
95 if len(reef) == 0: | 97 if len(reef) == 0: |
96 accumulated_msgs = empty_value(i, "reef", accumulated_msgs) | 98 accumulated_msgs = empty_value(line_no, "reef", accumulated_msgs) |
97 # Required. | 99 # Required. |
98 region = items[5] | 100 region = items[5] |
99 if len(region) == 0: | 101 if len(region) == 0: |
100 accumulated_msgs = empty_value(i, "region", accumulated_msgs) | 102 accumulated_msgs = empty_value(line_no, "region", accumulated_msgs) |
101 # Required and validated. | 103 # Required and validated. |
102 latitude = items[6] | 104 latitude = items[6] |
103 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) | 105 accumulated_msgs = validate_decimal(line_no, latitude, "latitude", accumulated_msgs) |
104 # Required and validated. | 106 # Required and validated. |
105 longitude = items[7] | 107 longitude = items[7] |
106 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) | 108 accumulated_msgs = validate_decimal(line_no, longitude, "longitude", accumulated_msgs) |
107 # Optional. | 109 # Optional. |
108 geographic_origin = items[8] | 110 geographic_origin = items[8] |
109 # Optional. | 111 # Optional. |
110 sample_location = items[9] | 112 colony_location = items[9] |
111 # Optional. | 113 # Optional. |
112 latitude_outplant = items[10] | 114 depth = items[10] |
113 # Optional. | 115 # Optional. |
114 longitude_outplant = items[11] | 116 disease_resist = items[11] |
115 # Optional. | 117 # Optional. |
116 depth = items[12] | 118 bleach_resist = items[12] |
117 # Optional. | 119 # Optional. |
118 disease_resist = items[13] | 120 mortality = items[13] |
119 # Optional. | 121 # Optional. |
120 bleach_resist = items[14] | 122 tle = items[14] |
121 # Optional. | 123 # Optional. |
122 mortality = items[15] | 124 spawning = string_as_boolean_string(items[15]) |
125 # Required. | |
126 collector_last_name = items[16] | |
127 if len(collector_last_name) == 0: | |
128 accumulated_msgs = empty_value(line_no, "collector_last_name", accumulated_msgs) | |
129 # Required. | |
130 collector_first_name = items[17] | |
131 if len(collector_first_name) == 0: | |
132 accumulated_msgs = empty_value(line_no, "collector_first_name", accumulated_msgs) | |
133 # Required. | |
134 org = items[18] | |
135 if len(org) == 0: | |
136 accumulated_msgs = empty_value(line_no, "org", accumulated_msgs) | |
137 # Required and validated. | |
138 collection_date = items[19] | |
139 accumulated_msgs = validate_date_string(line_no, collection_date, "collection_date", accumulated_msgs) | |
140 # Required and validated. | |
141 contact_email = items[20] | |
142 accumulated_msgs = validate_email(line_no, contact_email, accumulated_msgs) | |
143 # Required. | |
144 seq_facility = items[21] | |
145 if len(seq_facility) == 0: | |
146 accumulated_msgs = empty_value(line_no, "seq_facility", accumulated_msgs) | |
123 # Optional. | 147 # Optional. |
124 tle = items[16] | 148 array_version = items[22] |
125 # Optional. | 149 # Optional. |
126 spawning = string_as_boolean_string(items[17]) | 150 public = string_as_boolean_string(items[23]) |
127 # Required. | 151 # Optional. |
128 collector_last_name = items[18] | 152 public_after_date = items[24] |
129 if len(collector_last_name) == 0: | 153 accumulated_msga = validate_date_string(line_no, public_after_date, "public_after_date", accumulated_msgs) |
130 accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs) | |
131 # Required. | |
132 collector_first_name = items[19] | |
133 if len(collector_first_name) == 0: | |
134 accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs) | |
135 # Required. | |
136 org = items[20] | |
137 if len(org) == 0: | |
138 accumulated_msgs = empty_value(i, "org", accumulated_msgs) | |
139 # Required and validated. | 154 # Required and validated. |
140 collection_date = items[21] | 155 sperm_motility = items[25] |
141 accumulated_msgs = validate_date_string(i, collection_date, accumulated_msgs) | 156 accumulated_msgs = validate_decimal(line_no, sperm_motility, "sperm_motility", accumulated_msgs) |
142 # Required and validated. | 157 # Required and validated. |
143 contact_email = items[22] | 158 healing_time = items[26] |
144 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) | 159 accumulated_msgs = validate_decimal(line_no, healing_time, "healing_time", accumulated_msgs) |
145 # Required. | |
146 seq_facility = items[23] | |
147 if len(seq_facility) == 0: | |
148 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) | |
149 # Optional. | 160 # Optional. |
150 array_version = items[24] | 161 dna_extraction_method = items[27] |
151 # Optional. | 162 # Optional. |
152 public = string_as_boolean_string(items[25]) | 163 dna_concentration = items[28] |
153 # Optional. | |
154 public_after_date = items[26] | |
155 accumulated_msga = validate_date_string(i, public_after_date, accumulated_msgs) | |
156 # Required and validated. | |
157 sperm_motility = items[27] | |
158 accumulated_msgs = validate_decimal(i, sperm_motility, accumulated_msgs) | |
159 # Required and validated. | |
160 healing_time = items[28] | |
161 accumulated_msgs = validate_decimal(i, healing_time, accumulated_msgs) | |
162 # Optional. | |
163 dna_extraction_method = items[29] | |
164 # Optional. | |
165 dna_concentration = items[30] | |
166 # If dna_concentration has a value, then it must be decimal. | 164 # If dna_concentration has a value, then it must be decimal. |
167 if len(dna_concentration) > 0: | 165 if len(dna_concentration) > 0: |
168 accumulated_msgs = validate_decimal(i, dna_concentration, accumulated_msgs) | 166 accumulated_msgs = validate_decimal(line_no, dna_concentration, "dna_concentration", accumulated_msgs) |
169 # Optional. | 167 # Optional. |
170 registry_id = items[31] | 168 registry_id = items[29] |
171 | 169 # Optional. |
170 result_folder_name = items[30] | |
171 # Optional. | |
172 plate_barcode = items[31] | |
173 | |
172 | 174 |
173 if len(accumulated_msgs) > 0: | 175 if len(accumulated_msgs) > 0: |
174 stop_error(accumulated_msgs) | 176 stop_error(accumulated_msgs) |
175 | 177 |
176 shutil.copyfile(args.input, args.output) | 178 shutil.copyfile(args.input, args.output) |