Mercurial > repos > greg > insect_phenology_model
comparison insect_phenology_model.R @ 123:e69e30d853fb draft
Uploaded
author | greg |
---|---|
date | Thu, 31 May 2018 14:08:46 -0400 |
parents | 8946ddb9d72c |
children | 534658644efe |
comparison
equal
deleted
inserted
replaced
122:8946ddb9d72c | 123:e69e30d853fb |
---|---|
267 } else { | 267 } else { |
268 return(365); | 268 return(365); |
269 } | 269 } |
270 } | 270 } |
271 | 271 |
272 get_x_axis_ticks_and_labels = function(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, restricted_date_interval) { | 272 get_x_axis_ticks_and_labels = function(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval) { |
273 # Generate a list of ticks and labels for plotting the | 273 # Generate a list of ticks and labels for plotting the |
274 # x axis. There are several scenarios that affect this. | 274 # x axis. There are several scenarios that affect this. |
275 # 1. If restricted_date_interval is TRUE: | 275 # 1. If date_interval is TRUE: |
276 # a. | 276 # a. |
277 if (prepend_end_doy_norm > 0) { | 277 if (prepend_end_doy_norm > 0) { |
278 prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm); | 278 prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm); |
279 } else { | 279 } else { |
280 prepend_end_norm_row = 0; | 280 prepend_end_norm_row = 0; |
328 current_month_label = month_label; | 328 current_month_label = month_label; |
329 last_tick = i; | 329 last_tick = i; |
330 } | 330 } |
331 tick_index = get_tick_index(i, last_tick, ticks, month_labels) | 331 tick_index = get_tick_index(i, last_tick, ticks, month_labels) |
332 if (!is.null(tick_index)) { | 332 if (!is.null(tick_index)) { |
333 if (restricted_date_interval) { | 333 if (date_interval) { |
334 # Add a tick for every day. | 334 # Add a tick for every day. |
335 ticks[tick_index] = i; | 335 ticks[tick_index] = i; |
336 # Add a blank month label so it is not displayed. | 336 # Add a blank month label so it is not displayed. |
337 month_labels[tick_index] = ""; | 337 month_labels[tick_index] = ""; |
338 last_tick = i; | 338 last_tick = i; |
410 processing_year_to_date_data = TRUE; | 410 processing_year_to_date_data = TRUE; |
411 } | 411 } |
412 if (is.null(start_date) && is.null(end_date)) { | 412 if (is.null(start_date) && is.null(end_date)) { |
413 # We're processing the entire year, possibly merging | 413 # We're processing the entire year, possibly merging |
414 # data from input_norm with data from input_ytd. | 414 # data from input_norm with data from input_ytd. |
415 restricted_date_interval = FALSE; | 415 date_interval = FALSE; |
416 } else { | 416 } else { |
417 restricted_date_interval = TRUE; | 417 date_interval = TRUE; |
418 # Get the DOY for start_date and end_date. | 418 # Get the DOY for start_date and end_date. |
419 start_date_doy = strftime(start_date, format="%j"); | 419 start_date_doy = strftime(start_date, format="%j"); |
420 end_date_doy = strftime(end_date, format="%j"); | 420 end_date_doy = strftime(end_date, format="%j"); |
421 } | 421 } |
422 # Read the input_norm temperature datafile into a data frame. | 422 # Read the input_norm temperature datafile into a data frame. |
430 # The input_ytd data has the following 6 columns: | 430 # The input_ytd data has the following 6 columns: |
431 # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX | 431 # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX |
432 temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); | 432 temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); |
433 # Set the temperature_data_frame column names for access. | 433 # Set the temperature_data_frame column names for access. |
434 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); | 434 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); |
435 if (restricted_date_interval) { | 435 if (date_interval) { |
436 # We're plotting a date interval. | 436 # We're plotting a date interval. |
437 start_date_ytd_row = which(temperature_data_frame$DATE==start_date); | 437 start_date_ytd_row = which(temperature_data_frame$DATE==start_date); |
438 if (start_date_ytd_row > 0) { | 438 if (start_date_ytd_row > 0) { |
439 # The start date is contained within the input_ytd data. | 439 # The start date is contained within the input_ytd data. |
440 start_doy_ytd = as.integer(temperature_data_frame$DOY[start_date_ytd_row]); | 440 start_doy_ytd = as.integer(temperature_data_frame$DOY[start_date_ytd_row]); |
446 if (end_date_ytd_row > 0) { | 446 if (end_date_ytd_row > 0) { |
447 # The end date is contained within the input_ytd data. | 447 # The end date is contained within the input_ytd data. |
448 end_doy_ytd = as.integer(temperature_data_frame$DOY[end_date_ytd_row]); | 448 end_doy_ytd = as.integer(temperature_data_frame$DOY[end_date_ytd_row]); |
449 } | 449 } |
450 date_str = start_date; | 450 date_str = start_date; |
451 # Extract the year from the start date. | |
452 date_str_items = strsplit(date_str, "-")[[1]]; | |
453 year = date_str_items[1]; | |
451 } else { | 454 } else { |
452 # We're plotting an entire year. | 455 # We're plotting an entire year. |
453 # Get the number of days contained in temperature_data_frame. | 456 # Get the number of days contained in temperature_data_frame. |
454 num_rows = dim(temperature_data_frame)[1]; | 457 num_rows = dim(temperature_data_frame)[1]; |
455 # Get the start date and end date from temperature_data_frame. | 458 # Get the start date and end date from temperature_data_frame. |
456 start_date_ytd_row = 1; | 459 start_date_ytd_row = 1; |
460 # Temporarily set start_date to get the year. | |
457 start_date = temperature_data_frame$DATE[1]; | 461 start_date = temperature_data_frame$DATE[1]; |
458 end_date_ytd_row = num_rows; | 462 end_date_ytd_row = num_rows; |
459 end_date = temperature_data_frame$DATE[num_rows]; | 463 end_date = temperature_data_frame$DATE[num_rows]; |
460 date_str = format(start_date); | 464 date_str = format(start_date); |
465 # Extract the year from the start date. | |
466 date_str_items = strsplit(date_str, "-")[[1]]; | |
467 # Get the year. | |
468 year = date_str_items[1]; | |
469 # Properly set the start_date to be Jan 1 of the year. | |
470 start_date = paste(year, "01", "01", sep="-"); | |
471 # Properly set the end_date to be Dec 31 of the year. | |
472 end_date = paste(year, "12", "31", sep="-"); | |
461 # Save the first DOY to later check if start_date is Jan 1. | 473 # Save the first DOY to later check if start_date is Jan 1. |
462 start_doy_ytd = as.integer(temperature_data_frame$DOY[1]); | 474 start_doy_ytd = as.integer(temperature_data_frame$DOY[1]); |
463 end_doy_ytd = as.integer(temperature_data_frame$DOY[num_rows]); | 475 end_doy_ytd = as.integer(temperature_data_frame$DOY[num_rows]); |
464 } | 476 } |
465 # Extract the year from the start date. | |
466 date_str_items = strsplit(date_str, "-")[[1]]; | |
467 year = date_str_items[1]; | |
468 } else { | 477 } else { |
469 # We're processing only the 30 year normals data, so create an empty | 478 # We're processing only the 30 year normals data, so create an empty |
470 # data frame for containing temperature data after it is converted | 479 # data frame for containing temperature data after it is converted |
471 # from the 30 year normals format to the year-to-date format. | 480 # from the 30 year normals format to the year-to-date format. |
472 temperature_data_frame = data.frame(matrix(ncol=6, nrow=0)); | 481 temperature_data_frame = data.frame(matrix(ncol=6, nrow=0)); |
473 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); | 482 colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); |
474 if (restricted_date_interval) { | 483 if (date_interval) { |
475 # We're plotting a date interval. | 484 # We're plotting a date interval. |
476 # Extract the year, month and day from the start date. | 485 # Extract the year, month and day from the start date. |
477 start_date_str = format(start_date); | 486 start_date_str = format(start_date); |
478 start_date_str_items = strsplit(start_date_str, "-")[[1]]; | 487 start_date_str_items = strsplit(start_date_str, "-")[[1]]; |
479 year = start_date_str_items[1]; | 488 year = start_date_str_items[1]; |
483 # Extract the month and day from the end date. | 492 # Extract the month and day from the end date. |
484 end_date_str = format(start_date); | 493 end_date_str = format(start_date); |
485 end_date_str_items = strsplit(end_date_str, "-")[[1]]; | 494 end_date_str_items = strsplit(end_date_str, "-")[[1]]; |
486 end_date_month = end_date_str_items[2]; | 495 end_date_month = end_date_str_items[2]; |
487 end_date_day = end_date_str_items[3]; | 496 end_date_day = end_date_str_items[3]; |
488 end_date = paste(year, start_date_month, start_date_day, sep="-"); | 497 end_date = paste(year, end_date_month, end_date_day, sep="-"); |
489 } else { | 498 } else { |
490 # We're plotting an entire year. | 499 # We're plotting an entire year. |
491 # Base all dates on the current date since 30 year | 500 # Base all dates on the current date since 30 year |
492 # normals data does not include any dates. | 501 # normals data does not include any dates. |
493 year = format(Sys.Date(), "%Y"); | 502 year = format(Sys.Date(), "%Y"); |
506 if (is.null(location) | length(location) == 0) { | 515 if (is.null(location) | length(location) == 0) { |
507 location = norm_data_frame$NAME[1]; | 516 location = norm_data_frame$NAME[1]; |
508 } | 517 } |
509 if (processing_year_to_date_data) { | 518 if (processing_year_to_date_data) { |
510 # Merge the year-to-date data with the 30 year normals data. | 519 # Merge the year-to-date data with the 30 year normals data. |
511 if (restricted_date_interval) { | 520 if (date_interval) { |
512 # The values of start_date_ytd_row and end_date_ytd_row were set above. | 521 # The values of start_date_ytd_row and end_date_ytd_row were set above. |
513 if (start_date_ytd_row > 0 & end_date_ytd_row > 0) { | 522 if (start_date_ytd_row > 0 & end_date_ytd_row > 0) { |
514 # The date interval is contained within the input_ytd | 523 # The date interval is contained within the input_ytd |
515 # data, so we don't need to merge the 30 year normals data. | 524 # data, so we don't need to merge the 30 year normals data. |
516 temperature_data_frame = temperature_data_frame[start_date_ytd_row:end_date_ytd_row,]; | 525 temperature_data_frame = temperature_data_frame[start_date_ytd_row:end_date_ytd_row,]; |
602 total_days = get_total_days(is_leap_year); | 611 total_days = get_total_days(is_leap_year); |
603 if (end_doy_ytd < total_days) { | 612 if (end_doy_ytd < total_days) { |
604 # Define the next row for the year-to-date data from the 30 year normals data. | 613 # Define the next row for the year-to-date data from the 30 year normals data. |
605 append_start_doy_norm = end_doy_ytd + 1; | 614 append_start_doy_norm = end_doy_ytd + 1; |
606 first_norm_row = which(norm_data_frame$DOY == append_start_doy_norm); | 615 first_norm_row = which(norm_data_frame$DOY == append_start_doy_norm); |
607 last_norm_row = which(norm_data_frame$DOY == total_days); | |
608 # Append the 30 year normals data to the year-to-date data. | 616 # Append the 30 year normals data to the year-to-date data. |
609 for (i in first_norm_row:last_norm_row) { | 617 for (i in first_norm_row:total_days) { |
610 temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i); | 618 temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i); |
611 } | 619 } |
612 } | 620 } |
613 } | 621 } |
614 } else { | 622 } else { |
615 # We're processing only the 30 year normals data. | 623 # We're processing only the 30 year normals data. |
616 if (restricted_date_interval) { | 624 if (date_interval) { |
617 # Populate temperature_data_frame from norm_data_frame. | 625 # Populate temperature_data_frame from norm_data_frame. |
618 temperature_data_frame = from_30_year_normals(temperature_data_frame, norm_data_frame, start_date_doy, end_date_doy); | 626 temperature_data_frame = from_30_year_normals(temperature_data_frame, norm_data_frame, start_date_doy, end_date_doy); |
619 } else { | 627 } else { |
620 total_days = get_total_days(is_leap_year); | 628 total_days = get_total_days(is_leap_year); |
621 for (i in 1:total_days) { | 629 for (i in 1:total_days) { |
724 stop_err(msg); | 732 stop_err(msg); |
725 } | 733 } |
726 return(valid_date); | 734 return(valid_date); |
727 } | 735 } |
728 | 736 |
737 if (is.null(opt$input_ytd)) { | |
738 processing_year_to_date_data = FALSE; | |
739 } else { | |
740 processing_year_to_date_data = TRUE; | |
741 } | |
742 # Determine if we're plotting generations separately. | |
743 if (opt$plot_generations_separately=="yes") { | |
744 plot_generations_separately = TRUE; | |
745 } else { | |
746 plot_generations_separately = FALSE; | |
747 } | |
748 if (is.null(opt$start_date) && is.null(opt$end_date)) { | |
749 # We're plotting an entire year. | |
750 date_interval = FALSE; | |
751 # Display the total number of days in the Galaxy history item blurb. | |
752 if (processing_year_to_date_data) { | |
753 cat("Number of days year-to-date: ", opt$num_days_ytd, "\n"); | |
754 } else { | |
755 if (is_leap_year) { | |
756 num_days = 366; | |
757 } else { | |
758 num_days = 365; | |
759 } | |
760 cat("Number of days in year: ", num_days, "\n"); | |
761 } | |
762 } else { | |
763 # FIXME: currently custom date fields are free text, but | |
764 # Galaxy should soon include support for a date selector | |
765 # at which point this tool should be enhanced to use it. | |
766 # Validate start_date. | |
767 date_interval = TRUE; | |
768 # Calaculate the number of days in the date interval rather | |
769 # than using the number of rows in the input temperature data. | |
770 start_date = validate_date(opt$start_date); | |
771 # Validate end_date. | |
772 end_date = validate_date(opt$end_date); | |
773 if (start_date >= end_date) { | |
774 stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots."); | |
775 } | |
776 # Calculate the number of days in the date interval. | |
777 num_days = difftime(start_date, end_date, units=c("days")); | |
778 if (num_days > 50) { | |
779 # We need to restrict date intervals since | |
780 # plots render tick marks for each day. | |
781 stop_err("Date intervals for plotting cannot exceed 50 days."); | |
782 } | |
783 # Display the total number of days in the Galaxy history item blurb. | |
784 cat("Number of days in date interval: ", num_days, "\n"); | |
785 } | |
729 # Parse the inputs. | 786 # Parse the inputs. |
730 data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$location, opt$start_date, opt$end_date); | 787 data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$location, opt$start_date, opt$end_date); |
731 temperature_data_frame = data_list[[1]]; | 788 temperature_data_frame = data_list[[1]]; |
732 # Information needed for plots, some of these values are | 789 # Information needed for plots, some of these values are |
733 # being reset here since in some case they were set above. | 790 # being reset here since in some case they were set above. |
736 prepend_end_doy_norm = data_list[[4]]; | 793 prepend_end_doy_norm = data_list[[4]]; |
737 append_start_doy_norm = data_list[[5]]; | 794 append_start_doy_norm = data_list[[5]]; |
738 is_leap_year = data_list[[6]]; | 795 is_leap_year = data_list[[6]]; |
739 location = data_list[[7]]; | 796 location = data_list[[7]]; |
740 | 797 |
741 if (is.null(input_ytd)) { | |
742 processing_year_to_date_data = FALSE; | |
743 } else { | |
744 processing_year_to_date_data = TRUE; | |
745 } | |
746 # Determine if we're plotting generations separately. | |
747 if (opt$plot_generations_separately=="yes") { | |
748 plot_generations_separately = TRUE; | |
749 } else { | |
750 plot_generations_separately = FALSE; | |
751 } | |
752 if (is.null(opt$start_date) && is.null(opt$end_date)) { | |
753 # We're plotting an entire year. | |
754 restricted_date_interval = FALSE; | |
755 # Display the total number of days in the Galaxy history item blurb. | |
756 if (processing_year_to_date_data) { | |
757 cat("Number of days year-to-date: ", opt$num_days_ytd, "\n"); | |
758 } else { | |
759 if (is_leap_year) { | |
760 num_days = 366; | |
761 } else { | |
762 num_days = 365; | |
763 } | |
764 cat("Number of days in year: ", num_days, "\n"); | |
765 } | |
766 } else { | |
767 # FIXME: currently custom date fields are free text, but | |
768 # Galaxy should soon include support for a date selector | |
769 # at which point this tool should be enhanced to use it. | |
770 # Validate start_date. | |
771 restricted_date_interval = TRUE; | |
772 # Calaculate the number of days in the date interval rather | |
773 # than using the number of rows in the input temperature data. | |
774 start_date = validate_date(opt$start_date); | |
775 # Validate end_date. | |
776 end_date = validate_date(opt$end_date); | |
777 if (start_date >= end_date) { | |
778 stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots."); | |
779 } | |
780 # Calculate the number of days in the date interval. | |
781 num_days = difftime(start_date, end_date, units=c("days")); | |
782 if (num_days > 50) { | |
783 # We need to restrict date intervals since | |
784 # plots render tick marks for each day. | |
785 stop_err("Date intervals for plotting cannot exceed 50 days."); | |
786 } | |
787 # Display the total number of days in the Galaxy history item blurb. | |
788 cat("Number of days in date interval: ", num_days, "\n"); | |
789 } | |
790 | |
791 # Create copies of the temperature data for generations P, F1 and F2 if we're plotting generations separately. | 798 # Create copies of the temperature data for generations P, F1 and F2 if we're plotting generations separately. |
792 if (plot_generations_separately) { | 799 if (plot_generations_separately) { |
793 temperature_data_frame_P = data.frame(temperature_data_frame); | 800 temperature_data_frame_P = data.frame(temperature_data_frame); |
794 temperature_data_frame_F1 = data.frame(temperature_data_frame); | 801 temperature_data_frame_F1 = data.frame(temperature_data_frame); |
795 temperature_data_frame_F2 = data.frame(temperature_data_frame); | 802 temperature_data_frame_F2 = data.frame(temperature_data_frame); |
796 } | 803 } |
797 | 804 |
798 # Get the ticks date labels for plots. | 805 # Get the ticks date labels for plots. |
799 ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, restricted_date_interval); | 806 ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval); |
800 ticks = c(unlist(ticks_and_labels[1])); | 807 ticks = c(unlist(ticks_and_labels[1])); |
801 date_labels = c(unlist(ticks_and_labels[2])); | 808 date_labels = c(unlist(ticks_and_labels[2])); |
802 # All latitude values are the same, so get the value for plots from the first row. | 809 # All latitude values are the same, so get the value for plots from the first row. |
803 latitude = temperature_data_frame$LATITUDE[1]; | 810 latitude = temperature_data_frame$LATITUDE[1]; |
804 | 811 |