Mercurial > repos > ecology > ecoregion_brt_analysis

--- a/BRT_model.xml	Wed Oct 16 11:44:56 2024 +0000
+++ b/BRT_model.xml	Wed May 28 10:10:52 2025 +0000
@@ -1,4 +1,4 @@
-<tool id="ecoregion_brt_analysis" name="BRT tool prediction" version="0.1.0+galaxy0" profile="22.05">
+<tool id="ecoregion_brt_analysis" name="BRT tool prediction" version="0.1.2+galaxy0" profile="23.2">
     <description>for species distribution modelling</description>
     <requirements>
        <requirement type="package" version="4.3.3">r-base</requirement>
@@ -22,13 +22,13 @@
     <inputs>
       <param name="enviro" type="data" format="tabular" label="Input your environment data file of your study area (tabular format only)" help="See example below"/>
       <param name="dec_env" type="select" label="What's the decimal separator of your environement data file ?">
-              <option value=".">Dot</option>
-              <option value=",">Comma</option>
+              <option value="Dot">Dot</option>
+              <option value="Comma">Comma</option>
       </param>
       <param type="data" name="species_files" label="Input your occurrences data file(s) containing also the environemental caracteristics where the species has been observe (tabular format only)" format="tabular" multiple="True" help="See example below"/>
-      <param name="dec_species" type="select" label="What's the decimal separator of your occurrences data file(s) ?" help="It must be the same for all your occurences data files" >
-              <option value=".">Dot</option>
-              <option value=",">Comma</option>
+      <param name="dec_species" type="select" label="What's the decimal separator of your occurrences data file(s) ?" help="It must be the same for all your occurences data files">
+              <option value="Dot">Dot</option>
+              <option value="Comma">Comma</option>
       </param>
       <param name="abioticname" type="data_column" label="Choose column(s) where your abiotic parameter are in your environment data file." data_ref="enviro" multiple="true" use_header_names="true"/>
     </inputs>
@@ -112,5 +112,3 @@
        <citation type="doi">10.32614/CRAN.package.ggplot2</citation>
     </citations>
 </tool>
-
-
--- a/brt.R	Wed Oct 16 11:44:56 2024 +0000
+++ b/brt.R	Wed May 28 10:10:52 2025 +0000
@@ -21,8 +21,8 @@
     enviro <- args[1]
     species_files <- args[2]
     abio_para <- args[3]
-    dec_env <- args[8]
-    dec_species <- args[9]
+    dec_env <- ifelse(args[8]=="Dot", ".", ",")
+    dec_species <- ifelse(args[9]=="Dot", ".", ",")
 }

 ### load data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/claraguess.R	Wed May 28 10:10:52 2025 +0000
@@ -0,0 +1,75 @@
+##30/04/2025
+##Jean Le Cras
+### Clustering with Clara algorithm with an option to determine the optimal number of clusters based on SIH index
+
+#load libraries
+library(cluster)
+library(dplyr)
+library(tidyverse)
+
+#load arguments
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args)==0) {
+  stop("This tool needs at least one argument")
+}
+
+#load data
+enviro_path <- args[1]
+preds_path <- args[2]
+taxas_path <- args[3]
+type <- args[4]
+k <- as.integer(args[5])
+metric <- args[6]
+samples <- as.integer(args[7])
+env.data <- read.table(enviro_path, sep = "\t", header = TRUE, dec = ".", na.strings = "-9999")
+
+data_split = str_split(preds_path, ",")
+preds.data = NULL
+
+for (i in 1:length(data_split[[1]])) {
+  df <- read.table(data_split[[1]][i], dec=".", sep="\t", header=T, na.strings="NA")
+  preds.data <- rbind(preds.data, df)
+  remove(df)
+}
+
+names(preds.data) <- c("lat", "long", "pred", "taxa")
+
+development_traits <- str_split(readLines(taxas_path), "\t")
+
+#select the clara model with the optimal number of clusters
+model <- NULL
+
+if (type == "auto") {
+  sih_scores <- c()
+  models <- list()
+
+  for (i in 2:k) {
+    models[[i]] <- clara(preds.data$pred, i, metric = metric, samples = samples, stand = TRUE)
+    sih_scores[i] <- models[[i]]$silinfo$avg.width
+  }
+  png("sih_scores.png")
+  plot(2:k, sih_scores[2:k], type = "b", xlab = "Number of clusters", ylab = "SIH index")
+  dev.off()
+
+  best_k <- which.max(sih_scores[3:k]) + 2
+  model <- models[[best_k]]
+  k <- best_k
+} else {
+  model <- clara(preds.data$pred, k, metric = metric, samples = samples, stand = TRUE)
+}
+
+#saving results
+png("silhouette_plot.png")
+plot(silhouette(model), main = paste("Silhouette plot for k =", k))
+dev.off()
+
+data.test <- matrix(preds.data$pred, nrow = nrow(env.data), ncol = nrow(preds.data) / nrow(env.data))
+data.test <- data.frame(data.test)
+names(data.test) <- unique(preds.data$development)
+
+full.data <- cbind(preds.data[1:nrow(data.test), 1:2], model$clustering)
+names(full.data) <- c("lat", "long", "cluster")
+full.data <- cbind(full.data, data.test, env.data[, 3:ncol(env.data)])
+
+write.table(full.data[1:3], file = "data_cluster.tabular", quote = FALSE, sep = "\t", row.names = FALSE)
+write.table(full.data, file = "clustered_taxas_env.tabular", quote = FALSE, sep = "\t", row.names = FALSE)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/enviro.tabular	Wed May 28 10:10:52 2025 +0000
@@ -0,0 +1,11 @@
+"long"	"lat"	"Carbo"	"Grav"
+"152250"	145.35	-66.91	1.06	0.92
+"12010"	139.91	-65.75	1.6	25.77
+"88982"	139.8	-66.34	1.16	2.91
+"109687"	144.67	-66.48	1.23	13.74
+"124999"	142.27	-66.59	3.31	3.13
+"119451"	141.04	-66.55	2.99	3.31
+"128864"	140.8	-66.62	2.76	2.95
+"125833"	142.26	-66.6	3.3	3.12
+"113403"	144.09	-66.5	1.4	8.95
+"56804"	140.83	-66.11	2.33	9.83
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/preds.tabular	Wed May 28 10:10:52 2025 +0000
@@ -0,0 +1,11 @@
+"lat"	"long"	"Prediction.index"	"spe"
+"148207"	-66.83	144.41	0.127117028637464	"Development_direct.development"
+"175359"	-65.83	141.48	0.000126665227113262	"Development_lecithotrophic"
+"226550"	-66.2	144.88	4.3718554237373e-05	"Development_lecithotrophic"
+"132855"	-66.66	142.98	0.512219435270445	"Development_direct.development"
+"380560"	-66.19	145.47	0.455627207042825	"Development_planktotrophic"
+"361759"	-66.06	140.61	0.529983873632234	"Development_planktotrophic"
+"345639"	-65.95	140.92	0.471266742957099	"Development_planktotrophic"
+"27420"	-65.9	144.56	0.0224605950764499	"Development_direct.development"
+"207288"	-66.07	143.27	4.36554587548896e-05	"Development_lecithotrophic"
+"349308"	-65.98	140.02	0.402308937370468	"Development_planktotrophic"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxas.tabular	Wed May 28 10:10:52 2025 +0000
@@ -0,0 +1,3 @@
+Development_direct.development
+Development_lecithotrophic
+Development_planktotrophic