Mercurial > repos > greg > validate_affy_metadata
changeset 9:d15ae203013e draft
Uploaded
author | greg |
---|---|
date | Thu, 15 Nov 2018 10:20:49 -0500 |
parents | 0735cfcc7490 |
children | bdfd9b8d32f1 |
files | validate_affy_metadata.py |
diffstat | 1 files changed, 50 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/validate_affy_metadata.py Wed Nov 14 15:01:41 2018 -0500 +++ b/validate_affy_metadata.py Thu Nov 15 10:20:49 2018 -0500 @@ -30,12 +30,21 @@ sys.exit(msg) +def string_as_boolean_string(string): + if str(string).lower() in ['true', 'yes', 'on', '1']: + return 'True' + else: + return 'False' + + def validate_date_string(line_no, date_string, accumulated_msgs): + if len(date_string) == 0: + return accumulated_msgs try: - datetime.datetime.strptime(date_string, '%y/%m/%d') + datetime.datetime.strptime(date_string, '%Y-%m-%d') return accumulated_msgs except ValueError: - return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YY/MM/DD)." % (line_no, date_string)) + return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string)) def validate_decimal(line_no, decimal_string, accumulated_msgs): @@ -64,93 +73,90 @@ continue line = line.rstrip("\r\n") if i > 97: - accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 96 data lines.") + accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).") stop_error(accumulated_msgs) items = line.split(",") - if len(items) != 31: - accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 31)." % (i, len(items))) + if len(items) != 29: + accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 29)." % (i, len(items))) stop_error(accumulated_msgs) - # Required. - sample_id = items[0] - if len(sample_id) == 0: - accumulated_msgs = empty_value(i, "sample_id", accumulated_msgs) # Required and validated. - date_entered_db = items[1] + date_entered_db = items[0] accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) # Required. - user_specimen_id = items[2] + user_specimen_id = items[1] if len(user_specimen_id) == 0: accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) # Optional. - duplicate_sample = items[3] + field_call = items[2] # Optional. - matching_samples = items[4] - # Optional. - field_call = items[5] + bcoral_genet_id = items[3] # Optional. - bcoral_genet_id = items[6] - # Optional. - bsym_genet_id = items[7] + bsym_genet_id = items[4] # Required. - reef = items[8] + reef = items[5] if len(reef) == 0: accumulated_msgs = empty_value(i, "reef", accumulated_msgs) # Required. - region = items[9] + region = items[6] if len(region) == 0: accumulated_msgs = empty_value(i, "region", accumulated_msgs) # Required and validated. - latitude = items[10] + latitude = items[7] accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) # Required and validated. - longitude = items[11] + longitude = items[8] accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) # Optional. - geographic_origin = items[12] + geographic_origin = items[9] + # Optional. + sample_location = items[10] # Optional. - sample_location = items[13] + latitude_outplant = items[11] # Optional. - latitude_outplant = items[14] + longitude_outplant = items[12] # Optional. - longitude_outplant = items[15] + depth = items[13] # Optional. - depth = items[16] + dist_shore = items[14] # Optional. - dist_shore = items[17] + disease_resist = items[15] # Optional. - disease_resist = items[18] + bleach_resist = items[16] # Optional. - bleach_resist = items[19] + mortality = items[17] # Optional. - mortality = items[20] - # Optional. - tle = items[21] + tle = items[18] # Optional. - spawning = items[22] + spawning = string_as_boolean_string(items[19]) + # Required. + collector_last_name = items[21] + if len(collector_last_name) == 0: + accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs) # Required. - collector = items[23] - if len(collector) == 0: - accumulated_msgs = empty_value(i, "collector", accumulated_msgs) + collector_first_name = items[22] + if len(collector_first_name) == 0: + accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs) # Required. - org = items[24] + org = items[23] if len(org) == 0: accumulated_msgs = empty_value(i, "org", accumulated_msgs) # Required and validated. - collection_date = items[25] + collection_date = items[24] accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) # Required and validated. - contact_email = items[26] + contact_email = items[25] accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) # Required. - seq_facility = items[27] + seq_facility = items[26] if len(seq_facility) == 0: accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) # Optional. - array_version = items[28] + array_version = items[27] # Optional. - data_sharing = items[29] + public = string_as_boolean_string(items[28]) # Optional. - data_hold = items[30] + public_after_date = items[30] + accumulated_msga = validate_date_string(public_after_date) if len(accumulated_msgs) > 0: stop_error(accumulated_msgs)