Mercurial > repos > ecology > ecoregion_brt_analysis
changeset 3:d63d74194100 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
author | ecology |
---|---|
date | Wed, 28 May 2025 10:10:52 +0000 |
parents | 44dcc1ae3489 |
children | |
files | BRT_model.xml brt.R claraguess.R test-data/enviro.tabular test-data/preds.tabular test-data/taxas.tabular |
diffstat | 6 files changed, 108 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/BRT_model.xml Wed Oct 16 11:44:56 2024 +0000 +++ b/BRT_model.xml Wed May 28 10:10:52 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="ecoregion_brt_analysis" name="BRT tool prediction" version="0.1.0+galaxy0" profile="22.05"> +<tool id="ecoregion_brt_analysis" name="BRT tool prediction" version="0.1.2+galaxy0" profile="23.2"> <description>for species distribution modelling</description> <requirements> <requirement type="package" version="4.3.3">r-base</requirement> @@ -22,13 +22,13 @@ <inputs> <param name="enviro" type="data" format="tabular" label="Input your environment data file of your study area (tabular format only)" help="See example below"/> <param name="dec_env" type="select" label="What's the decimal separator of your environement data file ?"> - <option value=".">Dot</option> - <option value=",">Comma</option> + <option value="Dot">Dot</option> + <option value="Comma">Comma</option> </param> <param type="data" name="species_files" label="Input your occurrences data file(s) containing also the environemental caracteristics where the species has been observe (tabular format only)" format="tabular" multiple="True" help="See example below"/> - <param name="dec_species" type="select" label="What's the decimal separator of your occurrences data file(s) ?" help="It must be the same for all your occurences data files" > - <option value=".">Dot</option> - <option value=",">Comma</option> + <param name="dec_species" type="select" label="What's the decimal separator of your occurrences data file(s) ?" help="It must be the same for all your occurences data files"> + <option value="Dot">Dot</option> + <option value="Comma">Comma</option> </param> <param name="abioticname" type="data_column" label="Choose column(s) where your abiotic parameter are in your environment data file." data_ref="enviro" multiple="true" use_header_names="true"/> </inputs> @@ -112,5 +112,3 @@ <citation type="doi">10.32614/CRAN.package.ggplot2</citation> </citations> </tool> - -
--- a/brt.R Wed Oct 16 11:44:56 2024 +0000 +++ b/brt.R Wed May 28 10:10:52 2025 +0000 @@ -21,8 +21,8 @@ enviro <- args[1] species_files <- args[2] abio_para <- args[3] - dec_env <- args[8] - dec_species <- args[9] + dec_env <- ifelse(args[8]=="Dot", ".", ",") + dec_species <- ifelse(args[9]=="Dot", ".", ",") } ### load data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/claraguess.R Wed May 28 10:10:52 2025 +0000 @@ -0,0 +1,75 @@ +##30/04/2025 +##Jean Le Cras +### Clustering with Clara algorithm with an option to determine the optimal number of clusters based on SIH index + +#load libraries +library(cluster) +library(dplyr) +library(tidyverse) + +#load arguments +args <- commandArgs(trailingOnly = TRUE) +if (length(args)==0) { + stop("This tool needs at least one argument") +} + +#load data +enviro_path <- args[1] +preds_path <- args[2] +taxas_path <- args[3] +type <- args[4] +k <- as.integer(args[5]) +metric <- args[6] +samples <- as.integer(args[7]) +env.data <- read.table(enviro_path, sep = "\t", header = TRUE, dec = ".", na.strings = "-9999") + +data_split = str_split(preds_path, ",") +preds.data = NULL + +for (i in 1:length(data_split[[1]])) { + df <- read.table(data_split[[1]][i], dec=".", sep="\t", header=T, na.strings="NA") + preds.data <- rbind(preds.data, df) + remove(df) +} + +names(preds.data) <- c("lat", "long", "pred", "taxa") + +development_traits <- str_split(readLines(taxas_path), "\t") + +#select the clara model with the optimal number of clusters +model <- NULL + +if (type == "auto") { + sih_scores <- c() + models <- list() + + for (i in 2:k) { + models[[i]] <- clara(preds.data$pred, i, metric = metric, samples = samples, stand = TRUE) + sih_scores[i] <- models[[i]]$silinfo$avg.width + } + png("sih_scores.png") + plot(2:k, sih_scores[2:k], type = "b", xlab = "Number of clusters", ylab = "SIH index") + dev.off() + + best_k <- which.max(sih_scores[3:k]) + 2 + model <- models[[best_k]] + k <- best_k +} else { + model <- clara(preds.data$pred, k, metric = metric, samples = samples, stand = TRUE) +} + +#saving results +png("silhouette_plot.png") +plot(silhouette(model), main = paste("Silhouette plot for k =", k)) +dev.off() + +data.test <- matrix(preds.data$pred, nrow = nrow(env.data), ncol = nrow(preds.data) / nrow(env.data)) +data.test <- data.frame(data.test) +names(data.test) <- unique(preds.data$development) + +full.data <- cbind(preds.data[1:nrow(data.test), 1:2], model$clustering) +names(full.data) <- c("lat", "long", "cluster") +full.data <- cbind(full.data, data.test, env.data[, 3:ncol(env.data)]) + +write.table(full.data[1:3], file = "data_cluster.tabular", quote = FALSE, sep = "\t", row.names = FALSE) +write.table(full.data, file = "clustered_taxas_env.tabular", quote = FALSE, sep = "\t", row.names = FALSE) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/enviro.tabular Wed May 28 10:10:52 2025 +0000 @@ -0,0 +1,11 @@ +"long" "lat" "Carbo" "Grav" +"152250" 145.35 -66.91 1.06 0.92 +"12010" 139.91 -65.75 1.6 25.77 +"88982" 139.8 -66.34 1.16 2.91 +"109687" 144.67 -66.48 1.23 13.74 +"124999" 142.27 -66.59 3.31 3.13 +"119451" 141.04 -66.55 2.99 3.31 +"128864" 140.8 -66.62 2.76 2.95 +"125833" 142.26 -66.6 3.3 3.12 +"113403" 144.09 -66.5 1.4 8.95 +"56804" 140.83 -66.11 2.33 9.83
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/preds.tabular Wed May 28 10:10:52 2025 +0000 @@ -0,0 +1,11 @@ +"lat" "long" "Prediction.index" "spe" +"148207" -66.83 144.41 0.127117028637464 "Development_direct.development" +"175359" -65.83 141.48 0.000126665227113262 "Development_lecithotrophic" +"226550" -66.2 144.88 4.3718554237373e-05 "Development_lecithotrophic" +"132855" -66.66 142.98 0.512219435270445 "Development_direct.development" +"380560" -66.19 145.47 0.455627207042825 "Development_planktotrophic" +"361759" -66.06 140.61 0.529983873632234 "Development_planktotrophic" +"345639" -65.95 140.92 0.471266742957099 "Development_planktotrophic" +"27420" -65.9 144.56 0.0224605950764499 "Development_direct.development" +"207288" -66.07 143.27 4.36554587548896e-05 "Development_lecithotrophic" +"349308" -65.98 140.02 0.402308937370468 "Development_planktotrophic"