diff validate_affy_metadata.py @ 25:d9f3bcfeecfe draft default tip

Uploaded
author greg
date Thu, 15 Aug 2019 13:15:22 -0400
parents 8a826d1afe69
children
line wrap: on
line diff
--- a/validate_affy_metadata.py	Tue Apr 23 08:57:13 2019 -0400
+++ b/validate_affy_metadata.py	Thu Aug 15 13:15:22 2019 -0400
@@ -37,22 +37,22 @@
         return 'False'
 
 
-def validate_date_string(line_no, date_string, accumulated_msgs):
+def validate_date_string(line_no, date_string, column, accumulated_msgs):
     if len(date_string) == 0:
         return accumulated_msgs
     try:
         datetime.datetime.strptime(date_string, '%Y-%m-%d')
         return accumulated_msgs
     except ValueError:
-        return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string))
+        return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD) for column %s." % (line_no, date_string, column))
 
 
-def validate_decimal(line_no, decimal_string, accumulated_msgs):
+def validate_decimal(line_no, decimal_string, column, accumulated_msgs):
     try:
         decimal.Decimal(decimal_string)
         return accumulated_msgs
     except Exception:
-        return add_error_msg(accumulated_msgs, "Line %d contains an incorrect decimal value (%s)." % (line_no, decimal_string))
+        return add_error_msg(accumulated_msgs, "Line %d contains an incorrect decimal value (%s) for column %s." % (line_no, decimal_string, column))
 
 
 def validate_email(line_no, email, accumulated_msgs):
@@ -71,19 +71,21 @@
         if i == 0:
             # Skip the header.
             continue
+        # Keep 1-based line value for error messages.
+        line_no = i + 1
         line = line.rstrip("\r\n")
         if i > 97:
             accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).")
             stop_error(accumulated_msgs)
         items = line.split("\t")
         if len(items) != 32:
-            accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 32)." % (i, len(items)))
+            accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 32)." % (line_no, len(items)))
             stop_error(accumulated_msgs)
         # Required and validated.
         # Required.
         user_specimen_id = items[0]
         if len(user_specimen_id) == 0:
-            accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "user_specimen_id", accumulated_msgs)
         # Optional.
         field_call = items[1]
         # Optional.
@@ -93,82 +95,82 @@
         # Required.
         reef = items[4]
         if len(reef) == 0:
-            accumulated_msgs = empty_value(i, "reef", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "reef", accumulated_msgs)
         # Required.
         region = items[5]
         if len(region) == 0:
-            accumulated_msgs = empty_value(i, "region", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "region", accumulated_msgs)
         # Required and validated.
         latitude = items[6]
-        accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs)
+        accumulated_msgs = validate_decimal(line_no, latitude, "latitude", accumulated_msgs)
         # Required and validated.
         longitude = items[7]
-        accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs)
+        accumulated_msgs = validate_decimal(line_no, longitude, "longitude", accumulated_msgs)
         # Optional.
         geographic_origin = items[8]
         # Optional.
-        sample_location = items[9]
+        colony_location = items[9]
         # Optional.
-        latitude_outplant = items[10]
+        depth = items[10]
         # Optional.
-        longitude_outplant = items[11]
+        disease_resist = items[11]
         # Optional.
-        depth = items[12]
-        # Optional.
-        disease_resist = items[13]
+        bleach_resist = items[12]
         # Optional.
-        bleach_resist = items[14]
+        mortality = items[13]
         # Optional.
-        mortality = items[15]
+        tle = items[14]
         # Optional.
-        tle = items[16]
-        # Optional.
-        spawning = string_as_boolean_string(items[17])
+        spawning = string_as_boolean_string(items[15])
         # Required.
-        collector_last_name = items[18]
+        collector_last_name = items[16]
         if len(collector_last_name) == 0:
-            accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "collector_last_name", accumulated_msgs)
         # Required.
-        collector_first_name = items[19]
+        collector_first_name = items[17]
         if len(collector_first_name) == 0:
-            accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "collector_first_name", accumulated_msgs)
         # Required.
-        org = items[20]
+        org = items[18]
         if len(org) == 0:
-            accumulated_msgs = empty_value(i, "org", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "org", accumulated_msgs)
         # Required and validated.
-        collection_date = items[21]
-        accumulated_msgs = validate_date_string(i, collection_date, accumulated_msgs)
+        collection_date = items[19]
+        accumulated_msgs = validate_date_string(line_no, collection_date, "collection_date", accumulated_msgs)
         # Required and validated.
-        contact_email = items[22]
-        accumulated_msgs = validate_email(i, contact_email, accumulated_msgs)
+        contact_email = items[20]
+        accumulated_msgs = validate_email(line_no, contact_email, accumulated_msgs)
         # Required.
-        seq_facility = items[23]
+        seq_facility = items[21]
         if len(seq_facility) == 0:
-            accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs)
+            accumulated_msgs = empty_value(line_no, "seq_facility", accumulated_msgs)
         # Optional.
-        array_version = items[24]
+        array_version = items[22]
         # Optional.
-        public = string_as_boolean_string(items[25])
+        public = string_as_boolean_string(items[23])
         # Optional.
-        public_after_date = items[26]
-        accumulated_msga = validate_date_string(i, public_after_date, accumulated_msgs)
+        public_after_date = items[24]
+        accumulated_msga = validate_date_string(line_no, public_after_date, "public_after_date", accumulated_msgs)
         # Required and validated.
-        sperm_motility = items[27]
-        accumulated_msgs = validate_decimal(i, sperm_motility, accumulated_msgs)
+        sperm_motility = items[25]
+        accumulated_msgs = validate_decimal(line_no, sperm_motility, "sperm_motility", accumulated_msgs)
         # Required and validated.
-        healing_time = items[28]
-        accumulated_msgs = validate_decimal(i, healing_time, accumulated_msgs)
+        healing_time = items[26]
+        accumulated_msgs = validate_decimal(line_no, healing_time, "healing_time", accumulated_msgs)
         # Optional.
-        dna_extraction_method = items[29]
+        dna_extraction_method = items[27]
         # Optional.
-        dna_concentration = items[30]
+        dna_concentration = items[28]
         # If dna_concentration has a value, then it must be decimal.
         if len(dna_concentration) > 0:
-            accumulated_msgs = validate_decimal(i, dna_concentration, accumulated_msgs)
+            accumulated_msgs = validate_decimal(line_no, dna_concentration, "dna_concentration", accumulated_msgs)
+        # Optional.
+        registry_id = items[29]
         # Optional.
-        registry_id = items[31]
-       
+        result_folder_name = items[30]
+        # Optional.
+        plate_barcode = items[31]
+
 
 if len(accumulated_msgs) > 0:
     stop_error(accumulated_msgs)