comparison insect_phenology_model.R @ 123:e69e30d853fb draft

Uploaded
author greg
date Thu, 31 May 2018 14:08:46 -0400
parents 8946ddb9d72c
children 534658644efe
comparison
equal deleted inserted replaced
122:8946ddb9d72c 123:e69e30d853fb
267 } else { 267 } else {
268 return(365); 268 return(365);
269 } 269 }
270 } 270 }
271 271
272 get_x_axis_ticks_and_labels = function(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, restricted_date_interval) { 272 get_x_axis_ticks_and_labels = function(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval) {
273 # Generate a list of ticks and labels for plotting the 273 # Generate a list of ticks and labels for plotting the
274 # x axis. There are several scenarios that affect this. 274 # x axis. There are several scenarios that affect this.
275 # 1. If restricted_date_interval is TRUE: 275 # 1. If date_interval is TRUE:
276 # a. 276 # a.
277 if (prepend_end_doy_norm > 0) { 277 if (prepend_end_doy_norm > 0) {
278 prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm); 278 prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm);
279 } else { 279 } else {
280 prepend_end_norm_row = 0; 280 prepend_end_norm_row = 0;
328 current_month_label = month_label; 328 current_month_label = month_label;
329 last_tick = i; 329 last_tick = i;
330 } 330 }
331 tick_index = get_tick_index(i, last_tick, ticks, month_labels) 331 tick_index = get_tick_index(i, last_tick, ticks, month_labels)
332 if (!is.null(tick_index)) { 332 if (!is.null(tick_index)) {
333 if (restricted_date_interval) { 333 if (date_interval) {
334 # Add a tick for every day. 334 # Add a tick for every day.
335 ticks[tick_index] = i; 335 ticks[tick_index] = i;
336 # Add a blank month label so it is not displayed. 336 # Add a blank month label so it is not displayed.
337 month_labels[tick_index] = ""; 337 month_labels[tick_index] = "";
338 last_tick = i; 338 last_tick = i;
410 processing_year_to_date_data = TRUE; 410 processing_year_to_date_data = TRUE;
411 } 411 }
412 if (is.null(start_date) && is.null(end_date)) { 412 if (is.null(start_date) && is.null(end_date)) {
413 # We're processing the entire year, possibly merging 413 # We're processing the entire year, possibly merging
414 # data from input_norm with data from input_ytd. 414 # data from input_norm with data from input_ytd.
415 restricted_date_interval = FALSE; 415 date_interval = FALSE;
416 } else { 416 } else {
417 restricted_date_interval = TRUE; 417 date_interval = TRUE;
418 # Get the DOY for start_date and end_date. 418 # Get the DOY for start_date and end_date.
419 start_date_doy = strftime(start_date, format="%j"); 419 start_date_doy = strftime(start_date, format="%j");
420 end_date_doy = strftime(end_date, format="%j"); 420 end_date_doy = strftime(end_date, format="%j");
421 } 421 }
422 # Read the input_norm temperature datafile into a data frame. 422 # Read the input_norm temperature datafile into a data frame.
430 # The input_ytd data has the following 6 columns: 430 # The input_ytd data has the following 6 columns:
431 # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX 431 # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX
432 temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); 432 temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=",");
433 # Set the temperature_data_frame column names for access. 433 # Set the temperature_data_frame column names for access.
434 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); 434 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX");
435 if (restricted_date_interval) { 435 if (date_interval) {
436 # We're plotting a date interval. 436 # We're plotting a date interval.
437 start_date_ytd_row = which(temperature_data_frame$DATE==start_date); 437 start_date_ytd_row = which(temperature_data_frame$DATE==start_date);
438 if (start_date_ytd_row > 0) { 438 if (start_date_ytd_row > 0) {
439 # The start date is contained within the input_ytd data. 439 # The start date is contained within the input_ytd data.
440 start_doy_ytd = as.integer(temperature_data_frame$DOY[start_date_ytd_row]); 440 start_doy_ytd = as.integer(temperature_data_frame$DOY[start_date_ytd_row]);
446 if (end_date_ytd_row > 0) { 446 if (end_date_ytd_row > 0) {
447 # The end date is contained within the input_ytd data. 447 # The end date is contained within the input_ytd data.
448 end_doy_ytd = as.integer(temperature_data_frame$DOY[end_date_ytd_row]); 448 end_doy_ytd = as.integer(temperature_data_frame$DOY[end_date_ytd_row]);
449 } 449 }
450 date_str = start_date; 450 date_str = start_date;
451 # Extract the year from the start date.
452 date_str_items = strsplit(date_str, "-")[[1]];
453 year = date_str_items[1];
451 } else { 454 } else {
452 # We're plotting an entire year. 455 # We're plotting an entire year.
453 # Get the number of days contained in temperature_data_frame. 456 # Get the number of days contained in temperature_data_frame.
454 num_rows = dim(temperature_data_frame)[1]; 457 num_rows = dim(temperature_data_frame)[1];
455 # Get the start date and end date from temperature_data_frame. 458 # Get the start date and end date from temperature_data_frame.
456 start_date_ytd_row = 1; 459 start_date_ytd_row = 1;
460 # Temporarily set start_date to get the year.
457 start_date = temperature_data_frame$DATE[1]; 461 start_date = temperature_data_frame$DATE[1];
458 end_date_ytd_row = num_rows; 462 end_date_ytd_row = num_rows;
459 end_date = temperature_data_frame$DATE[num_rows]; 463 end_date = temperature_data_frame$DATE[num_rows];
460 date_str = format(start_date); 464 date_str = format(start_date);
465 # Extract the year from the start date.
466 date_str_items = strsplit(date_str, "-")[[1]];
467 # Get the year.
468 year = date_str_items[1];
469 # Properly set the start_date to be Jan 1 of the year.
470 start_date = paste(year, "01", "01", sep="-");
471 # Properly set the end_date to be Dec 31 of the year.
472 end_date = paste(year, "12", "31", sep="-");
461 # Save the first DOY to later check if start_date is Jan 1. 473 # Save the first DOY to later check if start_date is Jan 1.
462 start_doy_ytd = as.integer(temperature_data_frame$DOY[1]); 474 start_doy_ytd = as.integer(temperature_data_frame$DOY[1]);
463 end_doy_ytd = as.integer(temperature_data_frame$DOY[num_rows]); 475 end_doy_ytd = as.integer(temperature_data_frame$DOY[num_rows]);
464 } 476 }
465 # Extract the year from the start date.
466 date_str_items = strsplit(date_str, "-")[[1]];
467 year = date_str_items[1];
468 } else { 477 } else {
469 # We're processing only the 30 year normals data, so create an empty 478 # We're processing only the 30 year normals data, so create an empty
470 # data frame for containing temperature data after it is converted 479 # data frame for containing temperature data after it is converted
471 # from the 30 year normals format to the year-to-date format. 480 # from the 30 year normals format to the year-to-date format.
472 temperature_data_frame = data.frame(matrix(ncol=6, nrow=0)); 481 temperature_data_frame = data.frame(matrix(ncol=6, nrow=0));
473 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); 482 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX");
474 if (restricted_date_interval) { 483 if (date_interval) {
475 # We're plotting a date interval. 484 # We're plotting a date interval.
476 # Extract the year, month and day from the start date. 485 # Extract the year, month and day from the start date.
477 start_date_str = format(start_date); 486 start_date_str = format(start_date);
478 start_date_str_items = strsplit(start_date_str, "-")[[1]]; 487 start_date_str_items = strsplit(start_date_str, "-")[[1]];
479 year = start_date_str_items[1]; 488 year = start_date_str_items[1];
483 # Extract the month and day from the end date. 492 # Extract the month and day from the end date.
484 end_date_str = format(start_date); 493 end_date_str = format(start_date);
485 end_date_str_items = strsplit(end_date_str, "-")[[1]]; 494 end_date_str_items = strsplit(end_date_str, "-")[[1]];
486 end_date_month = end_date_str_items[2]; 495 end_date_month = end_date_str_items[2];
487 end_date_day = end_date_str_items[3]; 496 end_date_day = end_date_str_items[3];
488 end_date = paste(year, start_date_month, start_date_day, sep="-"); 497 end_date = paste(year, end_date_month, end_date_day, sep="-");
489 } else { 498 } else {
490 # We're plotting an entire year. 499 # We're plotting an entire year.
491 # Base all dates on the current date since 30 year 500 # Base all dates on the current date since 30 year
492 # normals data does not include any dates. 501 # normals data does not include any dates.
493 year = format(Sys.Date(), "%Y"); 502 year = format(Sys.Date(), "%Y");
506 if (is.null(location) | length(location) == 0) { 515 if (is.null(location) | length(location) == 0) {
507 location = norm_data_frame$NAME[1]; 516 location = norm_data_frame$NAME[1];
508 } 517 }
509 if (processing_year_to_date_data) { 518 if (processing_year_to_date_data) {
510 # Merge the year-to-date data with the 30 year normals data. 519 # Merge the year-to-date data with the 30 year normals data.
511 if (restricted_date_interval) { 520 if (date_interval) {
512 # The values of start_date_ytd_row and end_date_ytd_row were set above. 521 # The values of start_date_ytd_row and end_date_ytd_row were set above.
513 if (start_date_ytd_row > 0 & end_date_ytd_row > 0) { 522 if (start_date_ytd_row > 0 & end_date_ytd_row > 0) {
514 # The date interval is contained within the input_ytd 523 # The date interval is contained within the input_ytd
515 # data, so we don't need to merge the 30 year normals data. 524 # data, so we don't need to merge the 30 year normals data.
516 temperature_data_frame = temperature_data_frame[start_date_ytd_row:end_date_ytd_row,]; 525 temperature_data_frame = temperature_data_frame[start_date_ytd_row:end_date_ytd_row,];
602 total_days = get_total_days(is_leap_year); 611 total_days = get_total_days(is_leap_year);
603 if (end_doy_ytd < total_days) { 612 if (end_doy_ytd < total_days) {
604 # Define the next row for the year-to-date data from the 30 year normals data. 613 # Define the next row for the year-to-date data from the 30 year normals data.
605 append_start_doy_norm = end_doy_ytd + 1; 614 append_start_doy_norm = end_doy_ytd + 1;
606 first_norm_row = which(norm_data_frame$DOY == append_start_doy_norm); 615 first_norm_row = which(norm_data_frame$DOY == append_start_doy_norm);
607 last_norm_row = which(norm_data_frame$DOY == total_days);
608 # Append the 30 year normals data to the year-to-date data. 616 # Append the 30 year normals data to the year-to-date data.
609 for (i in first_norm_row:last_norm_row) { 617 for (i in first_norm_row:total_days) {
610 temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i); 618 temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i);
611 } 619 }
612 } 620 }
613 } 621 }
614 } else { 622 } else {
615 # We're processing only the 30 year normals data. 623 # We're processing only the 30 year normals data.
616 if (restricted_date_interval) { 624 if (date_interval) {
617 # Populate temperature_data_frame from norm_data_frame. 625 # Populate temperature_data_frame from norm_data_frame.
618 temperature_data_frame = from_30_year_normals(temperature_data_frame, norm_data_frame, start_date_doy, end_date_doy); 626 temperature_data_frame = from_30_year_normals(temperature_data_frame, norm_data_frame, start_date_doy, end_date_doy);
619 } else { 627 } else {
620 total_days = get_total_days(is_leap_year); 628 total_days = get_total_days(is_leap_year);
621 for (i in 1:total_days) { 629 for (i in 1:total_days) {
724 stop_err(msg); 732 stop_err(msg);
725 } 733 }
726 return(valid_date); 734 return(valid_date);
727 } 735 }
728 736
737 if (is.null(opt$input_ytd)) {
738 processing_year_to_date_data = FALSE;
739 } else {
740 processing_year_to_date_data = TRUE;
741 }
742 # Determine if we're plotting generations separately.
743 if (opt$plot_generations_separately=="yes") {
744 plot_generations_separately = TRUE;
745 } else {
746 plot_generations_separately = FALSE;
747 }
748 if (is.null(opt$start_date) && is.null(opt$end_date)) {
749 # We're plotting an entire year.
750 date_interval = FALSE;
751 # Display the total number of days in the Galaxy history item blurb.
752 if (processing_year_to_date_data) {
753 cat("Number of days year-to-date: ", opt$num_days_ytd, "\n");
754 } else {
755 if (is_leap_year) {
756 num_days = 366;
757 } else {
758 num_days = 365;
759 }
760 cat("Number of days in year: ", num_days, "\n");
761 }
762 } else {
763 # FIXME: currently custom date fields are free text, but
764 # Galaxy should soon include support for a date selector
765 # at which point this tool should be enhanced to use it.
766 # Validate start_date.
767 date_interval = TRUE;
768 # Calaculate the number of days in the date interval rather
769 # than using the number of rows in the input temperature data.
770 start_date = validate_date(opt$start_date);
771 # Validate end_date.
772 end_date = validate_date(opt$end_date);
773 if (start_date >= end_date) {
774 stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots.");
775 }
776 # Calculate the number of days in the date interval.
777 num_days = difftime(start_date, end_date, units=c("days"));
778 if (num_days > 50) {
779 # We need to restrict date intervals since
780 # plots render tick marks for each day.
781 stop_err("Date intervals for plotting cannot exceed 50 days.");
782 }
783 # Display the total number of days in the Galaxy history item blurb.
784 cat("Number of days in date interval: ", num_days, "\n");
785 }
729 # Parse the inputs. 786 # Parse the inputs.
730 data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$location, opt$start_date, opt$end_date); 787 data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$location, opt$start_date, opt$end_date);
731 temperature_data_frame = data_list[[1]]; 788 temperature_data_frame = data_list[[1]];
732 # Information needed for plots, some of these values are 789 # Information needed for plots, some of these values are
733 # being reset here since in some case they were set above. 790 # being reset here since in some case they were set above.
736 prepend_end_doy_norm = data_list[[4]]; 793 prepend_end_doy_norm = data_list[[4]];
737 append_start_doy_norm = data_list[[5]]; 794 append_start_doy_norm = data_list[[5]];
738 is_leap_year = data_list[[6]]; 795 is_leap_year = data_list[[6]];
739 location = data_list[[7]]; 796 location = data_list[[7]];
740 797
741 if (is.null(input_ytd)) {
742 processing_year_to_date_data = FALSE;
743 } else {
744 processing_year_to_date_data = TRUE;
745 }
746 # Determine if we're plotting generations separately.
747 if (opt$plot_generations_separately=="yes") {
748 plot_generations_separately = TRUE;
749 } else {
750 plot_generations_separately = FALSE;
751 }
752 if (is.null(opt$start_date) && is.null(opt$end_date)) {
753 # We're plotting an entire year.
754 restricted_date_interval = FALSE;
755 # Display the total number of days in the Galaxy history item blurb.
756 if (processing_year_to_date_data) {
757 cat("Number of days year-to-date: ", opt$num_days_ytd, "\n");
758 } else {
759 if (is_leap_year) {
760 num_days = 366;
761 } else {
762 num_days = 365;
763 }
764 cat("Number of days in year: ", num_days, "\n");
765 }
766 } else {
767 # FIXME: currently custom date fields are free text, but
768 # Galaxy should soon include support for a date selector
769 # at which point this tool should be enhanced to use it.
770 # Validate start_date.
771 restricted_date_interval = TRUE;
772 # Calaculate the number of days in the date interval rather
773 # than using the number of rows in the input temperature data.
774 start_date = validate_date(opt$start_date);
775 # Validate end_date.
776 end_date = validate_date(opt$end_date);
777 if (start_date >= end_date) {
778 stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots.");
779 }
780 # Calculate the number of days in the date interval.
781 num_days = difftime(start_date, end_date, units=c("days"));
782 if (num_days > 50) {
783 # We need to restrict date intervals since
784 # plots render tick marks for each day.
785 stop_err("Date intervals for plotting cannot exceed 50 days.");
786 }
787 # Display the total number of days in the Galaxy history item blurb.
788 cat("Number of days in date interval: ", num_days, "\n");
789 }
790
791 # Create copies of the temperature data for generations P, F1 and F2 if we're plotting generations separately. 798 # Create copies of the temperature data for generations P, F1 and F2 if we're plotting generations separately.
792 if (plot_generations_separately) { 799 if (plot_generations_separately) {
793 temperature_data_frame_P = data.frame(temperature_data_frame); 800 temperature_data_frame_P = data.frame(temperature_data_frame);
794 temperature_data_frame_F1 = data.frame(temperature_data_frame); 801 temperature_data_frame_F1 = data.frame(temperature_data_frame);
795 temperature_data_frame_F2 = data.frame(temperature_data_frame); 802 temperature_data_frame_F2 = data.frame(temperature_data_frame);
796 } 803 }
797 804
798 # Get the ticks date labels for plots. 805 # Get the ticks date labels for plots.
799 ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, restricted_date_interval); 806 ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval);
800 ticks = c(unlist(ticks_and_labels[1])); 807 ticks = c(unlist(ticks_and_labels[1]));
801 date_labels = c(unlist(ticks_and_labels[2])); 808 date_labels = c(unlist(ticks_and_labels[2]));
802 # All latitude values are the same, so get the value for plots from the first row. 809 # All latitude values are the same, so get the value for plots from the first row.
803 latitude = temperature_data_frame$LATITUDE[1]; 810 latitude = temperature_data_frame$LATITUDE[1];
804 811