Mercurial > repos > ecology > obis_data
comparison analyze.r @ 0:ed3688d04c12 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/obisindicators commit 13ac67c0a21d742b29e6273cdff058560abad770
| author | ecology |
|---|---|
| date | Tue, 05 Nov 2024 14:17:28 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ed3688d04c12 |
|---|---|
| 1 #' Calculate Biodiversity Indicators, including ES50 (Hurlbert index) | |
| 2 #' | |
| 3 #' Calculate the expected number of marine species in a random sample of 50 | |
| 4 #' individuals (records) | |
| 5 #' | |
| 6 #' @param df data frame with unique species observations containing columns: | |
| 7 #' `cell`, `species`, `records` | |
| 8 #' @param esn expected number of marine species | |
| 9 #' | |
| 10 #' @return Data frame with the following extra columns: - `n`: number of records | |
| 11 #' - `sp`: species richness - `shannon`: Shannon index - `simpson`: Simpson | |
| 12 #' index - `es`: Hurlbert index (n = 50), i.e. expected species from 50 | |
| 13 #' samples ES(50) - `hill_1`: Hill number `exp(shannon)` - `hill_2`: Hill | |
| 14 #' number `1/simpson` - `hill_inf`: Hill number `1/maxp` | |
| 15 #' | |
| 16 #' @details The expected number of marine species in a random sample of 50 | |
| 17 #' individuals (records) is an indicator on marine biodiversity richness. The | |
| 18 #' ES50 is defined in OBIS as the `sum(esi)` over all species of the following | |
| 19 #' per species calculation: | |
| 20 #' | |
| 21 #' - when `n - ni >= 50 (with n as the total number of records in the cell and | |
| 22 #' ni the total number of records for the ith-species) | |
| 23 #' - `esi = 1 - exp(lngamma(n-ni+1) + lngamma(n-50+1) - lngamma(n-ni-50+1) - lngamma(n+1))` | |
| 24 #' | |
| 25 #' - when `n >= 50` - `esi = 1` | |
| 26 #' | |
| 27 #' - else - `esi = NULL` | |
| 28 #' | |
| 29 #' Warning: ES50 assumes that individuals are randomly distributed, the sample | |
| 30 #' size is sufficiently large, the samples are taxonomically similar, and that | |
| 31 #' all of the samples have been taken in the same manner. | |
| 32 #' | |
| 33 #' @export | |
| 34 #' @concept analyze | |
| 35 #' @examples | |
| 36 #' @importFrom gsl lngamma | |
| 37 calc_indicators <- function(df, esn = 50) { | |
| 38 | |
| 39 stopifnot(is.data.frame(df)) | |
| 40 stopifnot(is.numeric(esn)) | |
| 41 stopifnot(all(c("cell", "species", "records") %in% names(df))) | |
| 42 | |
| 43 df %>% | |
| 44 dplyr::group_by(cell, species) %>% | |
| 45 dplyr::summarize( | |
| 46 ni = sum(records), | |
| 47 .groups = "drop_last") %>% | |
| 48 dplyr::mutate(n = sum(ni)) %>% | |
| 49 dplyr::group_by(cell, species) %>% | |
| 50 dplyr::mutate( | |
| 51 hi = -(ni / n * log(ni / n)), | |
| 52 si = (ni / n)^2, | |
| 53 qi = ni / n, | |
| 54 esi = dplyr::case_when( | |
| 55 n - ni >= esn ~ 1 - exp(gsl::lngamma(n - ni + 1) + gsl::lngamma(n - esn + 1) - gsl::lngamma(n - ni - esn + 1) - gsl::lngamma(n + 1)), | |
| 56 n >= esn ~ 1 | |
| 57 ) | |
| 58 ) %>% | |
| 59 dplyr::group_by(cell) %>% | |
| 60 dplyr::summarize( | |
| 61 n = sum(ni), | |
| 62 sp = dplyr::n(), | |
| 63 shannon = sum(hi), | |
| 64 simpson = sum(si), | |
| 65 maxp = max(qi), | |
| 66 es = sum(esi), | |
| 67 .groups = "drop") %>% | |
| 68 dplyr::mutate( | |
| 69 hill_1 = exp(shannon), | |
| 70 hill_2 = 1 / simpson, | |
| 71 hill_inf = 1 / maxp) | |
| 72 } |
