Mercurial > repos > ecology > regionalgam_plot_trend
comparison dennis-gam-initial-functions.R @ 0:a4ac846c28ab draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/regionalgam commit ffe42225fff8992501b743ebe2c78e50fddc4a4e
| author | ecology |
|---|---|
| date | Thu, 20 Jun 2019 04:01:18 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a4ac846c28ab |
|---|---|
| 1 ### R-Script Adapted from script provided by the CEH, UK BY: Reto Schmucki [ reto.schmucki@mail.mcgill.ca] | |
| 2 ### DATE: 14 July 2014 function to run two stage model in DENNIS et al. 2013 | |
| 3 | |
| 4 | |
| 5 .onAttach <- function(libname, pkgname) { | |
| 6 packageStartupMessage(" The regionalGAM package that is no longer maintained, \n use the new rbms package instead. \n | |
| 7 devtools::install_github(\"RetoSchmucki/rbms\", force=TRUE)") | |
| 8 } | |
| 9 | |
| 10 | |
| 11 #' year_day_func Function | |
| 12 #' This function generate the full sequence of days, months and include the observation to that file. | |
| 13 #' @param sp_data A data.frame with your observation. | |
| 14 #' @keywords year days | |
| 15 #' @export | |
| 16 #' @author Reto Schmucki | |
| 17 #' @examples | |
| 18 #' year_day_func() | |
| 19 | |
| 20 | |
| 21 # FUNCTIONS | |
| 22 | |
| 23 year_day_func = function(sp_data) { | |
| 24 | |
| 25 year <- unique(sp_data$YEAR) | |
| 26 | |
| 27 origin.d <- paste(year, "01-01", sep = "-") | |
| 28 if ((year%%4 == 0) & ((year%%100 != 0) | (year%%400 == 0))) { | |
| 29 nday <- 366 | |
| 30 } else { | |
| 31 nday <- 365 | |
| 32 } | |
| 33 | |
| 34 date.serie <- as.POSIXlt(seq(as.Date(origin.d), length = nday, by = "day"), format = "%Y-%m-%d") | |
| 35 | |
| 36 dayno <- as.numeric(julian(date.serie, origin = as.Date(origin.d)) + 1) | |
| 37 month <- as.numeric(strftime(date.serie, format = "%m")) | |
| 38 week <- as.numeric(strftime(date.serie, format = "%W")) | |
| 39 week_day <- as.numeric(strftime(date.serie, format = "%u")) | |
| 40 day <- as.numeric(strftime(date.serie, format = "%d")) | |
| 41 | |
| 42 site_list <- sp_data[!duplicated(sp_data$SITE), c("SITE")] | |
| 43 | |
| 44 all_day_site <- data.frame(SPECIES = sp_data$SPECIES[1], SITE = rep(site_list, rep(nday, length(site_list))), | |
| 45 YEAR = sp_data$YEAR[1], MONTH = month, WEEK = week, DAY = day, DAY_WEEK = week_day, DAYNO = dayno, | |
| 46 COUNT = NA) | |
| 47 | |
| 48 count_index <- match(paste(sp_data$SITE, sp_data$DAYNO, sep = "_"), paste(all_day_site$SITE, all_day_site$DAYNO, | |
| 49 sep = "_")) | |
| 50 all_day_site$COUNT[count_index] <- sp_data$COUNT | |
| 51 site_count_length <- aggregate(sp_data$COUNT, by = list(sp_data$SITE), function(x) list(1:length(x))) | |
| 52 names(site_count_length$x) <- as.character(site_count_length$Group.1) | |
| 53 site_countno <- utils::stack(site_count_length$x) | |
| 54 all_day_site$COUNTNO <- NA | |
| 55 all_day_site$COUNTNO[count_index] <- site_countno$values # add count number to ease extraction of single count | |
| 56 | |
| 57 # Add zero to close observation season two weeks before and after the first and last | |
| 58 first_obs <- min(all_day_site$DAYNO[!is.na(all_day_site$COUNT)]) | |
| 59 last_obs <- max(all_day_site$DAYNO[!is.na(all_day_site$COUNT)]) | |
| 60 | |
| 61 closing_season <- c((first_obs - 11):(first_obs - 7), (last_obs + 7):(last_obs + 11)) | |
| 62 | |
| 63 # If closing season is before day 1 or day 365, simply set the first and last 5 days to 0 | |
| 64 if (min(closing_season) < 1) | |
| 65 closing_season[1:5] <- c(1:5) | |
| 66 if (max(closing_season) > nday) | |
| 67 closing_season[6:10] <- c((nday - 4):nday) | |
| 68 | |
| 69 all_day_site$COUNT[all_day_site$DAYNO %in% closing_season] <- 0 | |
| 70 all_day_site$ANCHOR <- 0 | |
| 71 all_day_site$ANCHOR[all_day_site$DAYNO %in% closing_season] <- 1 | |
| 72 | |
| 73 all_day_site <- all_day_site[order(all_day_site$SITE, all_day_site$DAYNO), ] | |
| 74 | |
| 75 return(all_day_site) | |
| 76 } | |
| 77 | |
| 78 | |
| 79 #' trap_area Function | |
| 80 #' | |
| 81 #' This function compute the area under the curve using the trapezoid method. | |
| 82 #' @param x A vector or a two columns matrix. | |
| 83 #' @param y A vector, Default is NULL | |
| 84 #' @keywords trapezoid | |
| 85 #' @export | |
| 86 #' @examples | |
| 87 #' trap_area() | |
| 88 | |
| 89 | |
| 90 trap_area = function(x, y = NULL) { | |
| 91 # If y is null and x has multiple columns then set y to x[,2] and x to x[,1] | |
| 92 if (is.null(y)) { | |
| 93 if (length(dim(x)) == 2) { | |
| 94 y = x[, 2] | |
| 95 x = x[, 1] | |
| 96 } else { | |
| 97 stop("ERROR: need to either specifiy both x and y or supply a two column data.frame/matrix to x") | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 # Check x and y are same length | |
| 102 if (length(x) != length(y)) { | |
| 103 stop("ERROR: x and y need to be the same length") | |
| 104 } | |
| 105 | |
| 106 # Need to exclude any pairs that are NA for either x or y | |
| 107 rm_inds = which(is.na(x) | is.na(y)) | |
| 108 if (length(rm_inds) > 0) { | |
| 109 x = x[-rm_inds] | |
| 110 y = y[-rm_inds] | |
| 111 } | |
| 112 | |
| 113 # Determine values of trapezoids under curve Get inds | |
| 114 inds = 1:(length(x) - 1) | |
| 115 # Determine area using trapezoidal rule Area = ( (b1 + b2)/2 ) * h where b1 and b2 are lengths of bases | |
| 116 # (the parallel sides) and h is the height (the perpendicular distance between two bases) | |
| 117 areas = ((y[inds] + y[inds + 1])/2) * diff(x) | |
| 118 | |
| 119 # total area is sum of all trapezoid areas | |
| 120 tot_area = sum(areas) | |
| 121 | |
| 122 # Return total area | |
| 123 return(tot_area) | |
| 124 } | |
| 125 | |
| 126 | |
| 127 #' trap_index Function | |
| 128 #' | |
| 129 #' This function compute the area under the curve (Abundance Index) across species, sites and years | |
| 130 #' @param sp_data A data.frame containing species count data generated from the year_day_func() | |
| 131 #' @param y A vector, Default is NULL | |
| 132 #' @keywords Abundance index | |
| 133 #' @export | |
| 134 #' @examples | |
| 135 #' trap_index() | |
| 136 | |
| 137 | |
| 138 | |
| 139 trap_index = function(sp_data, data_col = "IMP", time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) { | |
| 140 | |
| 141 # Build output data.frame | |
| 142 out_obj = unique(sp_data[, by_col]) | |
| 143 # Set row.names to be equal to collapsing of output rows (will be unique, you need them to make uploading | |
| 144 # values back to data.frame will be easier) | |
| 145 row.names(out_obj) = apply(out_obj, 1, paste, collapse = "_") | |
| 146 | |
| 147 # Using this row.names from out_obj above as index in by function to loop through values all unique combs | |
| 148 # of by_cols and fit trap_area to data | |
| 149 ind_dat = by(sp_data[, c(time_col, data_col)], apply(sp_data[, by_col], 1, paste, collapse = "_"), trap_area) | |
| 150 | |
| 151 # Add this data to output object | |
| 152 out_obj[names(ind_dat), "SINDEX"] = round(ind_dat/7, 1) | |
| 153 | |
| 154 # Set row.names to defaults | |
| 155 row.names(out_obj) = NULL | |
| 156 | |
| 157 # Return output object | |
| 158 return(out_obj) | |
| 159 } | |
| 160 | |
| 161 | |
| 162 #' flight_curve Function | |
| 163 #' This function compute the flight curve across and years | |
| 164 #' @param your_dataset A data.frame containing original species count data. The data format is a csv (comma "," separated) with 6 columns with headers, namely "species","transect_id","visit_year","visit_month","visit_day","sp_count" | |
| 165 #' @keywords standardize flight curve | |
| 166 #' @export | |
| 167 #' @examples | |
| 168 #' flight_curve() | |
| 169 | |
| 170 | |
| 171 flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur = 1) { | |
| 172 | |
| 173 if("mgcv" %in% installed.packages() == "FALSE") { | |
| 174 print("mgcv package is not installed.") | |
| 175 x <- readline("Do you want to install it? Y/N") | |
| 176 if (x == 'Y') { | |
| 177 install.packages("mgcv") | |
| 178 } | |
| 179 if (x == 'N') { | |
| 180 stop("flight curve can not be computed without the mgcv package, sorry") | |
| 181 } | |
| 182 } | |
| 183 | |
| 184 flight_pheno <- data.frame() | |
| 185 | |
| 186 your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH, | |
| 187 your_dataset$YEAR, sep = "/"), "%d/%m/%Y")$yday + 1 | |
| 188 dataset <- your_dataset[, c("SPECIES", "SITE", "YEAR", "MONTH", | |
| 189 "DAY", "DAYNO", "COUNT")] | |
| 190 sample_year <- unique(dataset$YEAR) | |
| 191 sample_year <- sample_year[order(sample_year)] | |
| 192 | |
| 193 if (length(sample_year) >1 ) { | |
| 194 for (y in sample_year) { | |
| 195 dataset_y <- dataset[dataset$YEAR == y, ] | |
| 196 | |
| 197 # subset sites with enough visit and occurence | |
| 198 occ <- aggregate(dataset_y$COUNT, by = list(SITE = dataset_y$SITE), function(x) sum(x > 0)) | |
| 199 vis <- aggregate(dataset_y$COUNT, by = list(SITE = dataset_y$SITE), function(x) length(x)) | |
| 200 dataset_y <- dataset_y[dataset_y$SITE %in% occ$SITE[occ$x >= MinOccur], ] | |
| 201 dataset_y <- dataset_y[dataset_y$SITE %in% vis$SITE[vis$x >= MinVisit], ] | |
| 202 nsite <- length(unique(dataset_y$SITE)) | |
| 203 if (nsite < 1) { | |
| 204 print(paste("No sites with sufficient visits and occurence, MinOccur:", MinOccur, " MinVisit: ", MinVisit, " for " , dataset$SPECIES[1],"at year", y)) | |
| 205 next | |
| 206 } | |
| 207 # Determine missing days and add to dataset | |
| 208 sp_data_all <- year_day_func(dataset_y) | |
| 209 if (nsite > 200) { | |
| 210 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite, | |
| 211 200, replace = F)]), ] | |
| 212 sp_data_all <- sp_data_all | |
| 213 } | |
| 214 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 | |
| 215 print(paste("Fitting the GAM for",as.character(sp_data_all$SPECIES[1]),"and year",y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time())) | |
| 216 if(length(unique(sp_data_all$SITE))>1){ | |
| 217 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1, | |
| 218 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 219 } | |
| 220 else { | |
| 221 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, | |
| 222 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 223 } | |
| 224 # Give a second try if the GAM does not converge the first time | |
| 225 if (class(gam_obj_site)[1] == "try-error") { | |
| 226 # Determine missing days and add to dataset | |
| 227 sp_data_all <- year_day_func(dataset_y) | |
| 228 if (nsite > 200) { | |
| 229 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite, | |
| 230 200, replace = F)]), ] | |
| 231 } | |
| 232 else { | |
| 233 sp_data_all <- sp_data_all | |
| 234 } | |
| 235 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 | |
| 236 print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time(),"second try")) | |
| 237 if(length(unique(sp_data_all$SITE))>1){ | |
| 238 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1, | |
| 239 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 240 } | |
| 241 else { | |
| 242 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, | |
| 243 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 244 } | |
| 245 if (class(gam_obj_site)[1] == "try-error") { | |
| 246 print(paste("Error in fitting the flight period for",sp_data_all$SPECIES[1],"at year", y,"no convergence after two trial")) | |
| 247 sp_data_all[, "FITTED"] <- NA | |
| 248 sp_data_all[, "COUNT_IMPUTED"] <- NA | |
| 249 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA | |
| 250 sp_data_all[, "NM"] <- NA | |
| 251 } | |
| 252 else { | |
| 253 # Generate a list of values for all days from the additive model and use | |
| 254 # these value to fill the missing observations | |
| 255 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[, | |
| 256 c("trimDAYNO", "SITE")], type = "response") | |
| 257 # force zeros at the beginning end end of the year | |
| 258 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 | |
| 259 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 | |
| 260 # if infinite number in predict replace with NA. | |
| 261 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ | |
| 262 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) | |
| 263 sp_data_all[, "FITTED"] <- NA | |
| 264 sp_data_all[, "COUNT_IMPUTED"] <- NA | |
| 265 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA | |
| 266 sp_data_all[, "NM"] <- NA | |
| 267 } | |
| 268 else { | |
| 269 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT | |
| 270 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)] | |
| 271 # Define the flight curve from the fitted values and append them over | |
| 272 # years (this is one flight curve per year for all site) | |
| 273 site_sums <- aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), | |
| 274 FUN = sum) | |
| 275 # Rename sum column | |
| 276 names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM" | |
| 277 # Add data to sp_data data.frame (ensure merge does not sort the data!) | |
| 278 sp_data_all = merge(sp_data_all, site_sums, by <- c("SITE"), | |
| 279 all = TRUE, sort = FALSE) | |
| 280 # Calculate normalized values | |
| 281 sp_data_all[, "NM"] <- sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM | |
| 282 } | |
| 283 } | |
| 284 } | |
| 285 else { | |
| 286 # Generate a list of values for all days from the additive model and use | |
| 287 # these value to fill the missing observations | |
| 288 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[, | |
| 289 c("trimDAYNO", "SITE")], type = "response") | |
| 290 # force zeros at the beginning end end of the year | |
| 291 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 | |
| 292 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 | |
| 293 # if infinite number in predict replace with NA. | |
| 294 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ | |
| 295 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) | |
| 296 sp_data_all[, "FITTED"] <- NA | |
| 297 sp_data_all[, "COUNT_IMPUTED"] <- NA | |
| 298 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA | |
| 299 sp_data_all[, "NM"] <- NA | |
| 300 } | |
| 301 else { | |
| 302 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT | |
| 303 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)] | |
| 304 # Define the flight curve from the fitted values and append them over | |
| 305 # years (this is one flight curve per year for all site) | |
| 306 site_sums = aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), | |
| 307 FUN = sum) | |
| 308 # Rename sum column | |
| 309 names(site_sums)[names(site_sums) == "x"] = "SITE_YR_FSUM" | |
| 310 # Add data to sp_data data.frame (ensure merge does not sort the data!) | |
| 311 sp_data_all = merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE, | |
| 312 sort = FALSE) | |
| 313 # Calculate normalized values | |
| 314 sp_data_all[, "NM"] = sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM | |
| 315 } | |
| 316 } | |
| 317 sp_data_filled <- sp_data_all | |
| 318 flight_curve <- data.frame(species = sp_data_filled$SPECIES, year = sp_data_filled$YEAR, | |
| 319 week = sp_data_filled$WEEK, DAYNO = sp_data_filled$DAYNO, DAYNO_adj = sp_data_filled$trimDAYNO, | |
| 320 nm = sp_data_filled$NM)[!duplicated(paste(sp_data_filled$YEAR, | |
| 321 sp_data_filled$DAYNO, sep = "_")), ] | |
| 322 flight_curve <- flight_curve[order(flight_curve$DAYNO), ] | |
| 323 # bind if exist else create | |
| 324 if (is.na(flight_curve$nm[1])) next() | |
| 325 | |
| 326 flight_pheno <- rbind(flight_pheno, flight_curve) | |
| 327 | |
| 328 } # end of year loop | |
| 329 } | |
| 330 else { | |
| 331 y <- unique(dataset$YEAR) | |
| 332 dataset_y <- dataset[dataset$YEAR == y, ] | |
| 333 # subset sites with enough visit and occurence | |
| 334 occ <- aggregate(dataset_y$COUNT, by = list(SITE = dataset_y$SITE), function(x) sum(x > 0)) | |
| 335 vis <- aggregate(dataset_y$COUNT, by = list(SITE = dataset_y$SITE), function(x) length(x)) | |
| 336 dataset_y <- dataset_y[dataset_y$SITE %in% occ$SITE[occ$x >= MinOccur], ] | |
| 337 dataset_y <- dataset_y[dataset_y$SITE %in% vis$SITE[vis$x >= MinVisit], ] | |
| 338 nsite <- length(unique(dataset_y$SITE)) | |
| 339 if (nsite < 1) { | |
| 340 stop(paste("No sites with sufficient visits and occurence, MinOccur:", MinOccur, " MinVisit: ", MinVisit, " for " ,dataset$SPECIES[1],"at year", y)) | |
| 341 } | |
| 342 # Determine missing days and add to dataset | |
| 343 sp_data_all <- year_day_func(dataset_y) | |
| 344 if (nsite > 200) { | |
| 345 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite, | |
| 346 200, replace = F)]), ] | |
| 347 } | |
| 348 else { | |
| 349 sp_data_all <- sp_data_all | |
| 350 } | |
| 351 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 | |
| 352 print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,":",Sys.time())) | |
| 353 if(length(unique(sp_data_all$SITE))>1){ | |
| 354 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1, | |
| 355 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 356 } | |
| 357 else { | |
| 358 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, | |
| 359 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 360 } | |
| 361 # Give a second try if the GAM does not converge the first time | |
| 362 if (class(gam_obj_site)[1] == "try-error") { | |
| 363 # Determine missing days and add to dataset | |
| 364 sp_data_all <- year_day_func(dataset_y) | |
| 365 if (nsite > 200) { | |
| 366 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite, | |
| 367 200, replace = F)]), ] | |
| 368 } | |
| 369 else { | |
| 370 sp_data_all <- sp_data_all | |
| 371 } | |
| 372 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 | |
| 373 print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time(),"second try")) | |
| 374 if(length(unique(sp_data_all$SITE))>1){ | |
| 375 gam_obj_site <- try(mgcv::bam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) - 1, | |
| 376 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 377 } | |
| 378 else { | |
| 379 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, | |
| 380 data = sp_data_all, family = GamFamily), silent = TRUE) | |
| 381 } | |
| 382 if (class(gam_obj_site)[1] == "try-error") { | |
| 383 print(paste("Error in fitting the flight period for",sp_data_all$SPECIES[1],"at year", y,"no convergence after two trial")) | |
| 384 sp_data_all[, "FITTED"] <- NA | |
| 385 sp_data_all[, "COUNT_IMPUTED"] <- NA | |
| 386 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA | |
| 387 sp_data_all[, "NM"] <- NA | |
| 388 } | |
| 389 else { | |
| 390 # Generate a list of values for all days from the additive model and use | |
| 391 # these value to fill the missing observations | |
| 392 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[, | |
| 393 c("trimDAYNO", "SITE")], type = "response") | |
| 394 # force zeros at the beginning end end of the year | |
| 395 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 | |
| 396 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 | |
| 397 # if infinite number in predict replace with NA. | |
| 398 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ | |
| 399 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) | |
| 400 sp_data_all[, "FITTED"] <- NA | |
| 401 sp_data_all[, "COUNT_IMPUTED"] <- NA | |
| 402 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA | |
| 403 sp_data_all[, "NM"] <- NA | |
| 404 } | |
| 405 else { | |
| 406 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT | |
| 407 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)] | |
| 408 # Define the flight curve from the fitted values and append them over | |
| 409 # years (this is one flight curve per year for all site) | |
| 410 site_sums <- aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), | |
| 411 FUN = sum) | |
| 412 # Rename sum column | |
| 413 names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM" | |
| 414 # Add data to sp_data data.frame (ensure merge does not sort the data!) | |
| 415 sp_data_all = merge(sp_data_all, site_sums, by <- c("SITE"), | |
| 416 all = TRUE, sort = FALSE) | |
| 417 # Calculate normalized values | |
| 418 sp_data_all[, "NM"] <- sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM | |
| 419 } | |
| 420 } | |
| 421 } | |
| 422 else { | |
| 423 # Generate a list of values for all days from the additive model and use | |
| 424 # these value to fill the missing observations | |
| 425 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[, | |
| 426 c("trimDAYNO", "SITE")], type = "response") | |
| 427 # force zeros at the beginning end end of the year | |
| 428 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 | |
| 429 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 | |
| 430 # if infinite number in predict replace with NA. | |
| 431 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ | |
| 432 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) | |
| 433 sp_data_all[, "FITTED"] <- NA | |
| 434 sp_data_all[, "COUNT_IMPUTED"] <- NA | |
| 435 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA | |
| 436 sp_data_all[, "NM"] <- NA | |
| 437 } | |
| 438 else { | |
| 439 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT | |
| 440 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)] | |
| 441 # Define the flight curve from the fitted values and append them over | |
| 442 # years (this is one flight curve per year for all site) | |
| 443 site_sums = aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), | |
| 444 FUN = sum) | |
| 445 # Rename sum column | |
| 446 names(site_sums)[names(site_sums) == "x"] = "SITE_YR_FSUM" | |
| 447 # Add data to sp_data data.frame (ensure merge does not sort the data!) | |
| 448 sp_data_all = merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE, | |
| 449 sort = FALSE) | |
| 450 # Calculate normalized values | |
| 451 sp_data_all[, "NM"] = sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM | |
| 452 } | |
| 453 } | |
| 454 sp_data_filled <- sp_data_all | |
| 455 flight_curve <- data.frame(species = sp_data_filled$SPECIES, year = sp_data_filled$YEAR, | |
| 456 week = sp_data_filled$WEEK, DAYNO = sp_data_filled$DAYNO, DAYNO_adj = sp_data_filled$trimDAYNO, | |
| 457 nm = sp_data_filled$NM)[!duplicated(paste(sp_data_filled$YEAR, | |
| 458 sp_data_filled$DAYNO, sep = "_")), ] | |
| 459 flight_curve <- flight_curve[order(flight_curve$DAYNO), ] | |
| 460 | |
| 461 flight_pheno <- rbind(flight_pheno, flight_curve) | |
| 462 | |
| 463 } | |
| 464 return(flight_pheno) | |
| 465 } | |
| 466 | |
| 467 | |
| 468 #' abundance_index Function | |
| 469 #' | |
| 470 #' This function compute the Abundance Index across sites and years from your dataset and the regional flight curve | |
| 471 #' @param your_dataset A data.frame containing original species count data. The data format is a csv (comma "," separated) with 6 columns with headers, namely "species","transect_id","visit_year","visit_month","visit_day","sp_count" | |
| 472 #' @param flight_pheno A data.frame for the regional flight curve computed with the function flight_curve | |
| 473 #' @keywords standardize flight curve | |
| 474 #' @export | |
| 475 #' @examples | |
| 476 #' abundance_index() | |
| 477 | |
| 478 abundance_index <- function(your_dataset,flight_pheno) { | |
| 479 | |
| 480 your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH, | |
| 481 your_dataset$YEAR, sep = "/"), "%d/%m/%Y")$yday + 1 | |
| 482 | |
| 483 dataset <- your_dataset[, c("SPECIES", "SITE", "YEAR", "MONTH", | |
| 484 "DAY", "DAYNO", "COUNT")] | |
| 485 | |
| 486 sample_year <- unique(dataset$YEAR) | |
| 487 sample_year <- sample_year[order(sample_year)] | |
| 488 | |
| 489 | |
| 490 if (length(sample_year)>1){ | |
| 491 | |
| 492 for (y in sample_year) { | |
| 493 | |
| 494 year_pheno <- flight_pheno[flight_pheno$year == y, ] | |
| 495 | |
| 496 dataset_y <- dataset[dataset$YEAR == y, ] | |
| 497 | |
| 498 sp_data_site <- year_day_func(dataset_y) | |
| 499 sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1 | |
| 500 | |
| 501 sp_data_site <- merge(sp_data_site, year_pheno[, c("DAYNO", "nm")], | |
| 502 by = c("DAYNO"), all.x = TRUE, sort = FALSE) | |
| 503 | |
| 504 # compute proportion of the flight curve sampled due to missing visits | |
| 505 pro_missing_count <- data.frame(SITE = sp_data_site$SITE, WEEK = sp_data_site$WEEK, | |
| 506 NM = sp_data_site$nm, COUNT = sp_data_site$COUNT, ANCHOR = sp_data_site$ANCHOR) | |
| 507 pro_missing_count$site_week <- paste(pro_missing_count$SITE, pro_missing_count$WEEK, | |
| 508 sep = "_") | |
| 509 siteweeknocount <- aggregate(pro_missing_count$COUNT, by = list(pro_missing_count$site_week), | |
| 510 function(x) sum(!is.na(x)) == 0) | |
| 511 pro_missing_count <- pro_missing_count[pro_missing_count$site_week %in% | |
| 512 siteweeknocount$Group.1[siteweeknocount$x == TRUE], ] | |
| 513 pro_count_agg <- aggregate(pro_missing_count$NM, by = list(pro_missing_count$SITE), | |
| 514 function(x) 1 - sum(x, na.rm = T)) | |
| 515 names(pro_count_agg) <- c("SITE", "PROP_PHENO_SAMPLED") | |
| 516 | |
| 517 # remove samples outside the monitoring window | |
| 518 sp_data_site$COUNT[sp_data_site$nm==0] <- NA | |
| 519 | |
| 520 # Compute the regional GAM index | |
| 521 | |
| 522 if(length(unique(sp_data_site$SITE))>1){ | |
| 523 glm_obj_site <- glm(COUNT ~ factor(SITE) + offset(log(nm)) - 1, data = sp_data_site, | |
| 524 family = quasipoisson(link = "log"), control = list(maxit = 100)) | |
| 525 } else { | |
| 526 glm_obj_site <- glm(COUNT ~ offset(log(nm)) - 1, data = sp_data_site, | |
| 527 family = quasipoisson(link = "log"), control = list(maxit = 100)) | |
| 528 } | |
| 529 | |
| 530 sp_data_site[, "FITTED"] <- predict.glm(glm_obj_site, newdata = sp_data_site, | |
| 531 type = "response") | |
| 532 sp_data_site[, "COUNT_IMPUTED"] <- sp_data_site$COUNT | |
| 533 sp_data_site[is.na(sp_data_site$COUNT), "COUNT_IMPUTED"] <- sp_data_site$FITTED[is.na(sp_data_site$COUNT)] | |
| 534 | |
| 535 ## add fitted value for missing mid-week data | |
| 536 sp_data_site <- sp_data_site[!paste(sp_data_site$DAY_WEEK, sp_data_site$COUNT) %in% | |
| 537 c("1 NA", "2 NA", "3 NA", "5 NA", "6 NA", "7 NA"), ] | |
| 538 | |
| 539 ## remove all added mid-week values for weeks with real count | |
| 540 ## (observation) | |
| 541 sp_data_site$site_week <- paste(sp_data_site$SITE, sp_data_site$WEEK, | |
| 542 sep = "_") | |
| 543 siteweekcount <- aggregate(sp_data_site$COUNT, by = list(sp_data_site$site_week), | |
| 544 function(x) sum(!is.na(x)) > 0) | |
| 545 sp_data_site <- sp_data_site[!(is.na(sp_data_site$COUNT) & (sp_data_site$site_week %in% | |
| 546 siteweekcount$Group.1[siteweekcount$x == TRUE])), names(sp_data_site) != | |
| 547 "site_week"] | |
| 548 | |
| 549 ## Compute the regional GAM index | |
| 550 print(paste("Compute index for",sp_data_site$SPECIES[1],"at year", y,"for",length(unique(sp_data_site$SITE)),"sites:",Sys.time())) | |
| 551 regional_gam_index <- trap_index(sp_data_site, data_col = "COUNT_IMPUTED", | |
| 552 time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) | |
| 553 | |
| 554 cumu_index <- merge(regional_gam_index, pro_count_agg, by = c("SITE"), | |
| 555 all.x = TRUE, sort = FALSE) | |
| 556 names(cumu_index) <- c("SITE", "SPECIES", "YEAR", "regional_gam", "prop_pheno_sampled") | |
| 557 | |
| 558 cumu_index <- cumu_index[order(cumu_index$SITE), ] | |
| 559 | |
| 560 # bind if exist else create | |
| 561 if ("cumullated_indices" %in% ls()) { | |
| 562 cumullated_indices <- rbind(cumullated_indices, cumu_index) | |
| 563 } else { | |
| 564 cumullated_indices <- cumu_index | |
| 565 } | |
| 566 | |
| 567 } # end of year loop | |
| 568 | |
| 569 } else { | |
| 570 | |
| 571 y <- unique(dataset$YEAR) | |
| 572 year_pheno <- flight_pheno[flight_pheno$year == y, ] | |
| 573 | |
| 574 dataset_y <- dataset[dataset$YEAR == y, ] | |
| 575 | |
| 576 sp_data_site <- year_day_func(dataset_y) | |
| 577 sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1 | |
| 578 | |
| 579 sp_data_site <- merge(sp_data_site, year_pheno[, c("DAYNO", "nm")], | |
| 580 by = c("DAYNO"), all.x = TRUE, sort = FALSE) | |
| 581 | |
| 582 # compute proportion of the flight curve sampled due to missing visits | |
| 583 pro_missing_count <- data.frame(SITE = sp_data_site$SITE, WEEK = sp_data_site$WEEK, | |
| 584 NM = sp_data_site$nm, COUNT = sp_data_site$COUNT, ANCHOR = sp_data_site$ANCHOR) | |
| 585 pro_missing_count$site_week <- paste(pro_missing_count$SITE, pro_missing_count$WEEK, | |
| 586 sep = "_") | |
| 587 siteweeknocount <- aggregate(pro_missing_count$COUNT, by = list(pro_missing_count$site_week), | |
| 588 function(x) sum(!is.na(x)) == 0) | |
| 589 pro_missing_count <- pro_missing_count[pro_missing_count$site_week %in% | |
| 590 siteweeknocount$Group.1[siteweeknocount$x == TRUE], ] | |
| 591 pro_count_agg <- aggregate(pro_missing_count$NM, by = list(pro_missing_count$SITE), | |
| 592 function(x) 1 - sum(x, na.rm = T)) | |
| 593 names(pro_count_agg) <- c("SITE", "PROP_PHENO_SAMPLED") | |
| 594 | |
| 595 # remove samples outside the monitoring window | |
| 596 sp_data_site$COUNT[sp_data_site$nm==0] <- NA | |
| 597 | |
| 598 # Compute the regional GAM index | |
| 599 if(length(unique(sp_data_site$SITE))>1){ | |
| 600 glm_obj_site <- glm(COUNT ~ factor(SITE) + offset(log(nm)) - 1, data = sp_data_site, | |
| 601 family = quasipoisson(link = "log"), control = list(maxit = 100)) | |
| 602 } else { | |
| 603 glm_obj_site <- glm(COUNT ~ offset(log(nm)) - 1, data = sp_data_site, | |
| 604 family = quasipoisson(link = "log"), control = list(maxit = 100)) | |
| 605 } | |
| 606 | |
| 607 sp_data_site[, "FITTED"] <- predict.glm(glm_obj_site, newdata = sp_data_site, | |
| 608 type = "response") | |
| 609 sp_data_site[, "COUNT_IMPUTED"] <- sp_data_site$COUNT | |
| 610 sp_data_site[is.na(sp_data_site$COUNT), "COUNT_IMPUTED"] <- sp_data_site$FITTED[is.na(sp_data_site$COUNT)] | |
| 611 | |
| 612 # add fitted value for missing mid-week data | |
| 613 sp_data_site <- sp_data_site[!paste(sp_data_site$DAY_WEEK, sp_data_site$COUNT) %in% | |
| 614 c("1 NA", "2 NA", "3 NA", "5 NA", "6 NA", "7 NA"), ] | |
| 615 | |
| 616 # remove all added mid-week values for weeks with real count | |
| 617 # (observation) | |
| 618 sp_data_site$site_week <- paste(sp_data_site$SITE, sp_data_site$WEEK, | |
| 619 sep = "_") | |
| 620 siteweekcount <- aggregate(sp_data_site$COUNT, by = list(sp_data_site$site_week), | |
| 621 function(x) sum(!is.na(x)) > 0) | |
| 622 sp_data_site <- sp_data_site[!(is.na(sp_data_site$COUNT) & (sp_data_site$site_week %in% | |
| 623 siteweekcount$Group.1[siteweekcount$x == TRUE])), names(sp_data_site) != | |
| 624 "site_week"] | |
| 625 | |
| 626 # Compute the regional gam index | |
| 627 print(paste("Compute index for",sp_data_site$SPECIES[1],"at year", y,"for",length(unique(sp_data_site$SITE)),"sites:",Sys.time())) | |
| 628 regional_gam_index <- trap_index(sp_data_site, data_col = "COUNT_IMPUTED", | |
| 629 time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) | |
| 630 | |
| 631 cumu_index <- merge(regional_gam_index, pro_count_agg, by = c("SITE"), | |
| 632 all.x = TRUE, sort = FALSE) | |
| 633 names(cumu_index) <- c("SITE", "SPECIES", "YEAR", "regional_gam", "prop_pheno_sampled") | |
| 634 | |
| 635 cumu_index <- cumu_index[order(cumu_index$SITE), ] | |
| 636 | |
| 637 # bind if exist else create | |
| 638 if ("cumullated_indices" %in% ls()) { | |
| 639 cumullated_indices <- rbind(cumullated_indices, cumu_index) | |
| 640 } else { | |
| 641 cumullated_indices <- cumu_index | |
| 642 } | |
| 643 | |
| 644 } | |
| 645 | |
| 646 return(cumullated_indices) | |
| 647 | |
| 648 } |
