changeset 117:9c998fd06628 draft

Uploaded
author greg
date Tue, 29 May 2018 14:18:14 -0400
parents 88b409324611
children e0bd1edbd8e4
files insect_phenology_model.R
diffstat 1 files changed, 88 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/insect_phenology_model.R	Tue May 29 13:28:32 2018 -0400
+++ b/insect_phenology_model.R	Tue May 29 14:18:14 2018 -0400
@@ -6,6 +6,7 @@
     make_option(c("--adult_mortality"), action="store", dest="adult_mortality", type="integer", help="Adjustment rate for adult mortality"),
     make_option(c("--adult_accumulation"), action="store", dest="adult_accumulation", type="integer", help="Adjustment of degree-days accumulation (old nymph->adult)"),
     make_option(c("--egg_mortality"), action="store", dest="egg_mortality", type="integer", help="Adjustment rate for egg mortality"),
+    make_option(c("--end_date"), action="store", dest="end_date", default=NULL, help="End date for custom date interval"),
     make_option(c("--input_norm"), action="store", dest="input_norm", help="30 year normals temperature data for selected station"),
     make_option(c("--input_ytd"), action="store", dest="input_ytd", default=NULL, help="Year-to-date temperature data for selected location"),
     make_option(c("--insect"), action="store", dest="insect", help="Insect name"),
@@ -24,6 +25,7 @@
     make_option(c("--plot_generations_separately"), action="store", dest="plot_generations_separately", help="Plot Plot P, F1 and F2 as separate lines or pool across them"),
     make_option(c("--plot_std_error"), action="store", dest="plot_std_error", help="Plot Standard error"),
     make_option(c("--replications"), action="store", dest="replications", type="integer", help="Number of replications"),
+    make_option(c("--start_date"), action="store", dest="start_date", default=NULL, help="Start date for custom date interval"),
     make_option(c("--young_nymph_accumulation"), action="store", dest="young_nymph_accumulation", type="integer", help="Adjustment of degree-days accumulation (egg->young nymph)")
 )
 
@@ -143,7 +145,7 @@
     return(list(latitude, longitude, date_str, doy, tmin, tmax));
 }
 
-get_temperature_at_hour = function(latitude, temperature_data_frame, row, num_days) {
+get_temperature_at_hour = function(latitude, temperature_data_frame, row) {
     # Base development threshold for Brown Marmorated Stink Bug
     # insect phenology model.
     threshold = 14.17;
@@ -353,7 +355,7 @@
     return(mortality.probability);
 }
 
-parse_input_data = function(input_ytd, input_norm, num_days_ytd, location) {
+parse_input_data = function(input_ytd, input_norm, num_days, location, start_date, end_date) {
     if (is.null(input_ytd)) {
         # We're analysing only the 30 year normals data, so create an empty
         # data frame for containing temperature data after it is converted
@@ -363,33 +365,51 @@
         # Base all dates on the current date since 30 year
         # normals data does not include any dates.
         year = format(Sys.Date(), "%Y");
-        start_date = paste(year, "01", "01", sep="-");
-        end_date = paste(year, "12", "31", sep="-");
+        if (is.null(start_date) && is.null(end_date)) {
+            start_date = paste(year, "01", "01", sep="-");
+            end_date = paste(year, "12", "31", sep="-");
+        } else {
+            # Extract the month and day from the start date.
+            start_date_str = format(start_date);
+            start_date_str_items = strsplit(start_date_str, "-")[[1]];
+            start_date_month = start_date_str_items[2];
+            start_date_day = start_date_str_items[3];
+            start_date = paste(year, start_date_month, start_date_day, sep="-");
+            # Extract the month and day from the end date.
+            end_date_str = format(start_date);
+            end_date_str_items = strsplit(end_date_str, "-")[[1]];
+            end_date_month = end_date_str_items[2];
+            end_date_day = end_date_str_items[3];
+            end_date = paste(year, start_date_month, start_date_day, sep="-");
+        }
         # Set invalid start and end DOY.
         start_doy_ytd = 0;
         end_doy_ytd = 0;
     } else {
-        # Read the input_ytd temperature datafile into a data frame.
+        # Read the input_ytd temperature data file into a data frame.
         # The input_ytd data has the following 6 columns:
         # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX
         temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=",");
         # Set the temperature_data_frame column names for access.
         colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX");
-        # Get the start date.
-        start_date = temperature_data_frame$DATE[1];
-        end_date = temperature_data_frame$DATE[num_days_ytd];
+        if (is.null(start_date) && is.null(end_date)) {
+            # Get the start date.
+            start_date = temperature_data_frame$DATE[1];
+            end_date = temperature_data_frame$DATE[num_days];
+        } else {
+            # Extract the custom date interval from temperature_data_frame.
+            temperature_data_frame = temperature_data_frame[date %between% c(start_date, end_date)]
+        }
         # Extract the year from the start date.
         date_str = format(start_date);
         date_str_items = strsplit(date_str, "-")[[1]];
         year = date_str_items[1];
         # Save the first DOY to later check if start_date is Jan 1.
         start_doy_ytd = as.integer(temperature_data_frame$DOY[1]);
-        end_doy_ytd = as.integer(temperature_data_frame$DOY[num_days_ytd]);
+        end_doy_ytd = as.integer(temperature_data_frame$DOY[num_days]);
     }
     # See if we're in a leap year.
     is_leap_year = is_leap_year(start_date);
-    # Get the number of days in the year.
-    total_days = get_total_days(is_leap_year);
     # Read the input_norm temperature datafile into a data frame.
     # The input_norm data has the following 10 columns:
     # STATIONID, LATITUDE, LONGITUDE, ELEV_M, NAME, ST, MMDD, DOY, TMIN, TMAX
@@ -401,6 +421,15 @@
     if (!is_leap_year) {
         norm_data_frame = norm_data_frame[-c(60),];
     }
+    if (is.null(start_date) && is.null(end_date)) {
+        # Get the number of days in the year.
+        total_days = get_total_days(is_leap_year);
+    } else {
+        # Extract the custom date interval from norm_data_frame.
+        norm_data_frame = norm_data_frame[date %between% c(start_date, end_date)]
+        # Use the pre-determined num_days for total_days.
+        total_days = num_days
+    }
     # Set the location to be the station name if the user elected no to enter it.
     if (is.null(location) | length(location)==0) {
         location = norm_data_frame$NAME[1];
@@ -519,19 +548,62 @@
     }
 }
 
+stop_err = function(msg) {
+    cat(msg, file=stderr());
+    quit(save="no", status=1);
+}
+
+validate_date = function(date_str) {
+    valid_date = as.Date(date_str, format="%Y-%m-%d");
+    if( class(valid_date)=="try-error" || is.na(valid_date)) {
+        msg = paste("Invalid date: ", date_str, ", valid date format is yyyy-mm-dd.", sep="");
+        stop_err(msg);
+    }
+    return(valid_date);
+}
+
 # Determine if we're plotting generations separately.
 if (opt$plot_generations_separately=="yes") {
     plot_generations_separately = TRUE;
 } else {
     plot_generations_separately = FALSE;
 }
+# If opt$start_date and opt$end_date have values, then the
+# user chose to plot a custom date interval rather than the
+# entire contents of the input temperature data, so we'll
+# calaculate the number of days in the custom date interval
+# rather than using the number of rows in the input temperature
+# data.
+if (is.null(opt$start_date) && is.null(opt$end_date)) {
+    # Use the default number of rows in the input temperature
+    # data as the number of days.
+    num_days = opt$num_days_ytd;
+} else {
+    # FIXME: currently custom date fields are free text, but
+    # Galaxy should soon include support for a date selector
+    # at which point this tool should be enhanced to use it.
+    # Validate start_date.
+    start_date =  validate_date(opt$start_date);
+    # Validate end_date.
+    end_date = validate_date(opt$end_date);
+    if (start_date >= end_date) {
+        stop_err("The start date must be before the end date for custom date intervals.");
+    }
+    # Calculate the number of days in the custom date interval.
+    num_days = difftime(start_date, end_date, units=c("days"));
+    if (num_days > 50) {
+        # We need to restrict custom date intervals since
+        # plots render tick marks for each day.
+        stop_err("Custom date intervals cannot exceed 50 days.");
+    }
+}
 # Display the total number of days in the Galaxy history item blurb.
-cat("Year-to-date number of days: ", opt$num_days_ytd, "\n");
-
+cat("Year-to-date number of days: ", num_days, "\n");
 # Parse the inputs.
-data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$num_days_ytd, opt$location);
+data_list = parse_input_data(opt$input_ytd, opt$input_norm, num_days, opt$location, opt$start_date, opt$end_date);
 temperature_data_frame = data_list[[1]];
-# Information needed for plots.
+# Information needed for plots, some of these values are
+# being reset here since in some case they were set above.
 start_date = data_list[[2]];
 end_date = data_list[[3]];
 start_doy_ytd = data_list[[4]];
@@ -778,7 +850,7 @@
         doy = temperature_data_frame$DOY[row];
         # Photoperiod in the day.
         photoperiod = temperature_data_frame$DAYLEN[row];
-        temp.profile = get_temperature_at_hour(latitude, temperature_data_frame, row, total_days);
+        temp.profile = get_temperature_at_hour(latitude, temperature_data_frame, row);
         mean.temp = temp.profile[1];
         averages.temp = temp.profile[2];
         averages.day[row] = averages.temp;