Mercurial > repos > greg > validate_affy_metadata
diff validate_affy_metadata.py @ 25:d9f3bcfeecfe draft default tip
Uploaded
author | greg |
---|---|
date | Thu, 15 Aug 2019 13:15:22 -0400 |
parents | 8a826d1afe69 |
children |
line wrap: on
line diff
--- a/validate_affy_metadata.py Tue Apr 23 08:57:13 2019 -0400 +++ b/validate_affy_metadata.py Thu Aug 15 13:15:22 2019 -0400 @@ -37,22 +37,22 @@ return 'False' -def validate_date_string(line_no, date_string, accumulated_msgs): +def validate_date_string(line_no, date_string, column, accumulated_msgs): if len(date_string) == 0: return accumulated_msgs try: datetime.datetime.strptime(date_string, '%Y-%m-%d') return accumulated_msgs except ValueError: - return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string)) + return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD) for column %s." % (line_no, date_string, column)) -def validate_decimal(line_no, decimal_string, accumulated_msgs): +def validate_decimal(line_no, decimal_string, column, accumulated_msgs): try: decimal.Decimal(decimal_string) return accumulated_msgs except Exception: - return add_error_msg(accumulated_msgs, "Line %d contains an incorrect decimal value (%s)." % (line_no, decimal_string)) + return add_error_msg(accumulated_msgs, "Line %d contains an incorrect decimal value (%s) for column %s." % (line_no, decimal_string, column)) def validate_email(line_no, email, accumulated_msgs): @@ -71,19 +71,21 @@ if i == 0: # Skip the header. continue + # Keep 1-based line value for error messages. + line_no = i + 1 line = line.rstrip("\r\n") if i > 97: accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).") stop_error(accumulated_msgs) items = line.split("\t") if len(items) != 32: - accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 32)." % (i, len(items))) + accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 32)." % (line_no, len(items))) stop_error(accumulated_msgs) # Required and validated. # Required. user_specimen_id = items[0] if len(user_specimen_id) == 0: - accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "user_specimen_id", accumulated_msgs) # Optional. field_call = items[1] # Optional. @@ -93,82 +95,82 @@ # Required. reef = items[4] if len(reef) == 0: - accumulated_msgs = empty_value(i, "reef", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "reef", accumulated_msgs) # Required. region = items[5] if len(region) == 0: - accumulated_msgs = empty_value(i, "region", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "region", accumulated_msgs) # Required and validated. latitude = items[6] - accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) + accumulated_msgs = validate_decimal(line_no, latitude, "latitude", accumulated_msgs) # Required and validated. longitude = items[7] - accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) + accumulated_msgs = validate_decimal(line_no, longitude, "longitude", accumulated_msgs) # Optional. geographic_origin = items[8] # Optional. - sample_location = items[9] + colony_location = items[9] # Optional. - latitude_outplant = items[10] + depth = items[10] # Optional. - longitude_outplant = items[11] + disease_resist = items[11] # Optional. - depth = items[12] - # Optional. - disease_resist = items[13] + bleach_resist = items[12] # Optional. - bleach_resist = items[14] + mortality = items[13] # Optional. - mortality = items[15] + tle = items[14] # Optional. - tle = items[16] - # Optional. - spawning = string_as_boolean_string(items[17]) + spawning = string_as_boolean_string(items[15]) # Required. - collector_last_name = items[18] + collector_last_name = items[16] if len(collector_last_name) == 0: - accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "collector_last_name", accumulated_msgs) # Required. - collector_first_name = items[19] + collector_first_name = items[17] if len(collector_first_name) == 0: - accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "collector_first_name", accumulated_msgs) # Required. - org = items[20] + org = items[18] if len(org) == 0: - accumulated_msgs = empty_value(i, "org", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "org", accumulated_msgs) # Required and validated. - collection_date = items[21] - accumulated_msgs = validate_date_string(i, collection_date, accumulated_msgs) + collection_date = items[19] + accumulated_msgs = validate_date_string(line_no, collection_date, "collection_date", accumulated_msgs) # Required and validated. - contact_email = items[22] - accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) + contact_email = items[20] + accumulated_msgs = validate_email(line_no, contact_email, accumulated_msgs) # Required. - seq_facility = items[23] + seq_facility = items[21] if len(seq_facility) == 0: - accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) + accumulated_msgs = empty_value(line_no, "seq_facility", accumulated_msgs) # Optional. - array_version = items[24] + array_version = items[22] # Optional. - public = string_as_boolean_string(items[25]) + public = string_as_boolean_string(items[23]) # Optional. - public_after_date = items[26] - accumulated_msga = validate_date_string(i, public_after_date, accumulated_msgs) + public_after_date = items[24] + accumulated_msga = validate_date_string(line_no, public_after_date, "public_after_date", accumulated_msgs) # Required and validated. - sperm_motility = items[27] - accumulated_msgs = validate_decimal(i, sperm_motility, accumulated_msgs) + sperm_motility = items[25] + accumulated_msgs = validate_decimal(line_no, sperm_motility, "sperm_motility", accumulated_msgs) # Required and validated. - healing_time = items[28] - accumulated_msgs = validate_decimal(i, healing_time, accumulated_msgs) + healing_time = items[26] + accumulated_msgs = validate_decimal(line_no, healing_time, "healing_time", accumulated_msgs) # Optional. - dna_extraction_method = items[29] + dna_extraction_method = items[27] # Optional. - dna_concentration = items[30] + dna_concentration = items[28] # If dna_concentration has a value, then it must be decimal. if len(dna_concentration) > 0: - accumulated_msgs = validate_decimal(i, dna_concentration, accumulated_msgs) + accumulated_msgs = validate_decimal(line_no, dna_concentration, "dna_concentration", accumulated_msgs) + # Optional. + registry_id = items[29] # Optional. - registry_id = items[31] - + result_folder_name = items[30] + # Optional. + plate_barcode = items[31] + if len(accumulated_msgs) > 0: stop_error(accumulated_msgs)