Mercurial > repos > ecology > aquainfra_ogc_api_processes
changeset 8:aec309f44d47 draft default tip
planemo upload for repository https://github.com/AquaINFRA/tools-ecology/tree/master commit d6d779f958b5d3347769bdc83c885950ae975a42
author | ecology |
---|---|
date | Fri, 15 Aug 2025 11:19:22 +0000 |
parents | c805cb663bac |
children | |
files | aquainfra_ogc_api_processes.R aquainfra_ogc_api_processes.xml macros.xml test-data/points_att_polygon_test_input_2.txt test-data/points_att_polygon_test_input_3.txt |
diffstat | 5 files changed, 148 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/aquainfra_ogc_api_processes.R Mon Jul 21 06:19:08 2025 +0000 +++ b/aquainfra_ogc_api_processes.R Fri Aug 15 11:19:22 2025 +0000 @@ -166,6 +166,11 @@ convertedKeys <- c() for (key in names(inputParameters)) { + value_no_spaces <- gsub(" ", "", inputParameters[[key]]) + if (value_no_spaces == "") { + inputParameters[[key]] <- NULL + } + if (is.character(inputParameters[[key]]) && (endsWith(inputParameters[[key]], ".dat") || endsWith(inputParameters[[key]], ".txt"))) { @@ -180,7 +185,61 @@ inputParameters[[key]] <- json_string convertedKeys <- append(convertedKeys, key) - } else { + } else if ( + grepl("_object", key) && + !is.null(inputParameters[[key]]) && + !is.na(inputParameters[[key]]) && + gsub(" ", "", inputParameters[[key]]) != "" + ) { + decoded_value <- gsub("__oc__", "{", inputParameters[[key]]) + decoded_value <- gsub("__cc__", "}", decoded_value) + decoded_value <- gsub("__ob__", "[", decoded_value) + decoded_value <- gsub("__cb__", "]", decoded_value) + decoded_value <- gsub("__dq__", "\"", decoded_value) # Optional: sometimes used for quotes + decoded_value <- gsub("__cn__", ":", decoded_value) # Optional: used for colon in older versions + + parsed_json <- fromJSON(decoded_value) + convertedKey <- gsub("_object", "", key) + convertedKeys <- append(convertedKeys, convertedKey) + #json_string <- toJSON(parsed_json, auto_unbox = FALSE) + inputParameters[[key]] <- parsed_json + } else if (grepl("_array", key)) { + keyParts <- strsplit(key, split = "_")[[1]] + type <- keyParts[length(keyParts)] + values <- inputParameters[[key]] + if (is.character(values) && grepl(",", values)) { + value_list <- unlist(strsplit(values, split = ",")) + } else if (is.character(values)) { + value_list <- c(values) + } + + convertedValues <- c() + + for (value in value_list) { + value <- as.character(value) + value <- gsub(" ", "", value) + #if(type == "integer") { + # value <- as.integer(value) + #} else if (type == "numeric") { + # value <- as.numeric(balue) + #} else if (type == "character") { + # value <- as.character(value) + # value <- gsub(" ", "", value) + #} + convertedValues <- append(convertedValues, value) + convertedKey <- "" + for (part in keyParts) { + if(part == "array") { + break + } + convertedKey <- paste(convertedKey, paste(part, "_", sep=""), sep="") + } + convertedKey <- substr(convertedKey, 1, nchar(convertedKey)-1) + } + inputParameters[[key]] <- convertedValues + convertedKeys <- append(convertedKeys, convertedKey) + } + else { if (!is.null(inputParameters[[key]])) { convertedKeys <- append(convertedKeys, key) }
--- a/aquainfra_ogc_api_processes.xml Mon Jul 21 06:19:08 2025 +0000 +++ b/aquainfra_ogc_api_processes.xml Fri Aug 15 11:19:22 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="aquainfra_ogc_api_processes" name="AquaINFRA OGC API Processes" version="0.6.1" profile="22.05"> +<tool id="aquainfra_ogc_api_processes" name="AquaINFRA OGC API Processes" version="0.7.0" profile="22.05"> <description/> <macros> <import>macros.xml</import> @@ -30,6 +30,11 @@ <option value="tordera-gloria-connection">tordera-gloria-connection: SWAT+ output to MITgcm input connection tool</option> <option value="riverload">riverload: Compute River Load</option> <option value="mitgcm-resultplots">mitgcm-resultplots: Catalunya MITgcm Plotting Tool</option> + <option value="retrieve-biodiversity-data">retrieve-biodiversity-data: Retrieve biodiversity data from the web</option> + <option value="match-data">match-data: Combine and match biodiversity data from separate sources</option> + <option value="check-names">check-names: Check species names in biodiversity data from separate sources</option> + <option value="pred-extract">pred-extract: pred extract for extraction of environmnetal data</option> + <option value="multidetect-and-clean">multidetect-and-clean: Run multidetect and clean the data</option> </param> <when value="points-att-polygon"> <param name="input_data" label="Table to be merged with study region" optional="false" help="URL to the input table containing the in-situ data points with coordinates. Can be provided as Excel file or CSV file (comma-separated text file). The coordinates have to be in WGS84 coordinate system. (URL must be stored in a .txt file)" type="data" format="txt"/> @@ -163,6 +168,85 @@ <param name="min_var_for_color" label="Minimum value to be considered for color limit" optional="true" help="This is a parameter controlling the color limits to display. Example (for salinity): 35. Leave empty to let the color limits be chosen automatically." type="float"/> <param name="max_var_for_color" label="Maximum value to be considered for color limit" optional="true" help="This is a parameter controlling the color limits to display. Example (for salinity): 38. Leave empty to let the color limits be chosen automatically." type="float"/> </when> + <when value="retrieve-biodiversity-data"> + <param name="species_names_list" label="Species names to download (list of species)" optional="true" help="List of species. If you have a url to the dataset, use the option below." type="text"/> + <param name="species_names_url" label="Data with species names to download" optional="true" help="URL to a table containing the occurrences from any source. This can be the outcome of the matchdata process. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="colname_species" label="Species column name" optional="true" help="If the data provided in the data path is a dataframe, indicate here the column name with species names." type="text"/> + <param name="databases_array" label="Database names" optional="false" help="List of databases to consider. Only 'gbif, inat, vertnet' supported." type="text"/> + <param name="gbif_limit" label="Max number (GBIF)" optional="true" help="The maximum number of data points that are being retrieved from GBIF." type="integer"/> + <param name="inaturalist_limit" label="Max number (iNaturalist)" optional="true" help="The maximum number of data points that are being retrieved from iNaturalist." type="integer"/> + <param name="vertnet_limit" label="Max number (VertNet)" optional="true" help="The maximum number of data points that are being retrieved from VertNet." type="integer"/> + <param name="study_area_bbox_object" label="Area of Interest as a bounding box" optional="true" help="A bounding box: {'bbox': [south, west, north, east]} (see https://docs.ogc.org/is/18-062r2/18-062r2.html#bounding-box-value)" type="text"/> + <param name="study_area_shapefile" label="Area of Interest as shapefile" optional="true" help="URL to the zipped input shapefile containing the study areas inside which you would like to retrieve occurrences. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="study_area_geojson_url" label="Area of Interest as GeoJSON" optional="true" help="URL to the input GeoJSON file containing the study areas inside which you would like to retrieve occurrences. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="study_area_geojson_object" label="Area of Interest as a GeoJSON object" optional="true" help="Directly post the GeoJSON containing the study areas inside which you would like to retrieve occurrences." type="text"/> + <param name="percentage_correctness" label="Species name correctness in percent." optional="true" help="An number indicating the percentage of species name correctness that should be allowed to be replaced. High values ensure a perfect match from the standard database." type="float"/> + <param name="synonym_check" label="Consider synonyms from standard databases like FishBase" optional="false" help="YES, if synoymns species names should be returned from FishBase during data checks before data download." type="boolean" truevalue="True" falsevalue="False"/> + </when> + <when value="match-data"> + <param name="input_datasets" label="Occurrence data (GBIF, iNaturalist, VertNet, or from any other source)" optional="false" help="URL to CSV tables containing the occurrences from GBIF, iNaturalist and VertNet. This can be the outcome of the retreive biodiversity data process. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="colnames_species_names_array" label="Column names including species names" optional="false" help="List of column names that indicate the Species Name in each of the provided datasets. Example: 'speciesname, scientificName'" type="text"/> + <param name="colnames_countries_array" label="Column names including country" optional="false" help="List of column names that indicate the Country in each of the provided datasets. Example: 'JDS4_sampling_ID'" type="text"/> + <param name="colnames_lat_array" label="Column names including Latitude" optional="false" help="List of column names that indicate the Latitude in each of the provided datasets. Example: 'lat, latitude'" type="text"/> + <param name="colnames_lon_array" label="Column names including Longitude" optional="false" help="List of column names that indicate the Longitude in each of the provided datasets. Example: 'lon, long, longitude'" type="text"/> + <param name="colnames_date_array" label="Column names including Dates" optional="false" help="List of column names that indicate the dates in each of the provided datasets. Example: 'Date, sampling_date'" type="text"/> + </when> + <when value="check-names"> + <param name="species_names_list" label="Species names (List of species)" optional="true" help="List of species names to check from the standard database (FishBase)." type="text"/> + <param name="species_names_url" label="Species names to check" optional="true" help="URL to a table containing the occurrences from any source. This can be the outcome of the match-data process. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="colname_species" label="Column name including species names" optional="true" help="Column name that indicate the Species in the provided dataset. Only required you provide tabular occurrence data. Example: 'species'" type="text"/> + <param name="percent_correctness" label="Species name correctness in percent" optional="false" help="An number indicating the percentage of species name correctness that should be allowed to be replaced. High values ensure a perfect match from the standard database." type="float"/> + <param name="bool_merge" label="Merge checked species names on the dataset" optional="false" help="If a dataframe with species names is provided and this is YES, then all columns are returned. For only one species, the merge will be set automatically to false." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_synonym" label="Synonym checks" optional="false" help="Return synonym checks from FishBase." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_ecosystem_type" label="Output ecosytem types" optional="false" help="Return ecoystem checks for species from FishBase including marine, freshwater, and brackish." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_rm_duplicates" label="Remove duplicates" optional="false" help="Remove duplicates from the dataset. Default is FALSE." type="boolean" truevalue="True" falsevalue="False"/> + </when> + <when value="pred-extract"> + <param name="input_data" label="Occurrence data" optional="false" help="URL to a table containing the occurrences from any source. This can be the outcome of the check-names process. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="input_raster_url_or_name" label="Input raster" optional="false" help="URL of a cloud-optimized raster to be used (will not be downloaded but accessed remotely), or the name of one of the static rasters on the server. Currently only worldclim is available, for testing purposes." type="text"/> + <param name="study_area_extent" label="Area of Interest as a Shapefile" optional="true" help="URL to the input shapefile containing the study areas inside which you would like to delineate the bounding box or extent for study area. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="study_area_geojson_url" label="Area of Interest as a GeoJSON" optional="true" help="URL to the input GeoJSON file containing the study areas inside which you would like to delineate the bounding box or extent for study area. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="study_area_geojson_object" label="Area of Interest as GeoJSON object" optional="true" help="Directly post the GeoJSON containing the study areas inside which you would like to delineate the bounding box or extent for study area." type="text"/> + <param name="colname_lat" label="Column name including Latitude" optional="false" help="Column name lat. Example: 'decimalLatitude'" type="text"/> + <param name="colname_lon" label="Column name including Longitude" optional="false" help="Column name lon. Example: 'decimalLongitude'" type="text"/> + <param name="colname_species" label="Column name including Species" optional="false" help="Column name species. Example: 'speciescheck'" type="text"/> + <param name="mininmum_sprecords" label="Minimum points to consider from the species dataset" optional="false" help="The minimum number of points in a dataset to allow species retained in the dataset. Example: 10, is species dataset is less than 10 records will be ignored." type="integer"/> + <param name="bool_merge" label="Merge extracted data" optional="false" help="This allows to return the full dataset with merged with extracted data." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_list" label="Return a list of dataframe for species if they are more than 1" optional="false" help="For multiple species, if set to TRUE, then a list of species dataframes are returned. If false, then a dataframe is returned." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_coords" label="Attach the original coordinates" optional="false" help="If YES, the original records used for data extraction will be attached on the dataset. Easy to tract the slight changes in coordinates during data extraction." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_remove_nas" label="Remove NAs after data extraction" optional="false" help="If YES, the NAs created when the coordinates did not retrun records and left in the dataset. Default TRUE" type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_remove_duplicates" label="Remove duplicates" optional="false" help="If TRUE, duplicates records will be removed. Default TRUE" type="boolean" truevalue="True" falsevalue="False"/> + <param name="minimum_sprecordsallow" label="Minimum species records to consider before using data in species dsitribution modeling" optional="false" help="In species distribution models, species with fewer records needs to be removed but normal data extraction this parameter should be set to FALSE to retain all species." type="boolean" truevalue="True" falsevalue="False"/> + </when> + <when value="multidetect-and-clean"> + <param name="input_data" label="Input table" optional="false" help="URL to the input table containing the data to be cleaned from outliers: Data sets for multiple or single species from pred_extract and other sources. (URL must be stored in a .txt file)" type="data" format="txt"/> + <param name="colname_variable" label="Variable of interest" optional="false" help="Column name identifying the variable of interest where outliers will be checked from for univariate outlier detection methods such as Z-score, mixed interquantile range, reverse jackknifing" type="text"/> + <param name="select_columns" label="Specify columns to be checked" optional="true" help="In a multivariate dataset, if only particular columns needs to be checked then they should be indicated here. Otherwise all columns will be considerd in outlier detection." type="text"/> + <param name="multiple_species" label="Distinguish between multiple species" optional="false" help="If NO, then only a single species dataset is expected. " type="boolean" truevalue="True" falsevalue="False"/> + <param name="output_type" label="Output type" optional="false" help="Set whether you want to return outliers or clean dataset. Example outlier." type="text"/> + <param name="group_colname" label="Column name including group names" optional="true" help="For multiple groups in a dataframe, provide the column name containing the groups to be checked. For example, a column name with species name in a dataset." type="text"/> + <param name="colname_exclude" label="Column names to exclude" optional="true" help="Exclude mainly numerical variables that are not necessary in the analysis, for example x and y columns or latitude/longitude, row numbers or serial IDs. Categorical variables are removed automatically in the data preparation." type="text"/> + <param name="methods" label="Outlier detection methods" optional="false" help="The name of methods for Outlier detection to be used, as a comma-separated string. Example = 'mixediqr, logboxplot, iqr, distboxplot, jknife, semiqr, hampel, iforest, lof, mahal'." type="text"/> + <param name="silence_true_errors" label="Silence methods that geuninely fail during outlier detection" optional="false" help="If YES, Silence errors for methods that genuinely druing the outlier detection process but continue without breaking other methods." type="boolean" truevalue="True" falsevalue="False"/> + <param name="boot_run" label="Bootstrapping execution" optional="false" help="If set to YES, then bootstrapping will be done for small samples." type="boolean" truevalue="True" falsevalue="False"/> + <param name="boot_maxrecords" label="Maximum records to intiate bootstrapping" optional="true" help="The user can adjust the maximum records were to be be bootstrapped. Default is 30." type="integer"/> + <param name="number_of_boots" label="Number of bootstraps" optional="true" help="The number of bootstraps to generate during bootstrapping. The default is 10." type="integer"/> + <param name="setseed" label="Set seed" optional="false" help="Durring bootstrapping, random samples are generated that requires to set a seed for reproducibility." type="integer"/> + <param name="boot_threshold" label="Threshold value to flag a record from bootstrap samples" optional="true" help="As a record is flagged in multiple bootstrap samples, a threshold is required to extract an outlier. Default is 0.6, meaning if a record is flagged 6 of the 10 bootsraps, will be flagged as an outlier." type="float"/> + <param name="exceute_pca" label="Execute Principal Component Analysis" optional="false" help="If true, then PCA will be intiated." type="boolean" truevalue="True" falsevalue="False"/> + <param name="number_of_pca" label="Number of principal components to retain" optional="true" help="The user can indicate the maximum number of principal components to retain in the outlier detection. The Default is 5." type="integer"/> + <param name="pca_silence" label="Hide messages during PCA analysis" optional="true" help="Messages during PCA analyis are returned if set to NO. Default is YES to silence the messages like the variance explained from the total PCs selected." type="boolean" truevalue="True" falsevalue="False"/> + <param name="pcavariable" label="Select the principal component among the PCs retained to be used as variable of interest" optional="true" help="PC1 is the default variable selected for outlier detection. Other PC2 can can also be set. PC1 is advisable." type="text"/> + <param name="sdm_data" label="Change the outlier detection routine based on data type" optional="false" help="If it is univariate data then, set to NO. All data that requires multivariate analysis such as using kmeans, isolation forest, set to YES. Default is YES." type="boolean" truevalue="True" falsevalue="False"/> + <param name="inform_na_outlier" label="Hide messages for removing NAs" optional="true" help="If set to YES, NAs removed will be displayed for each group variable. Default is NO." type="boolean" truevalue="True" falsevalue="False"/> + <param name="missingness" label="Percentage missing values in a group" optional="true" help="Allowed missing values in a column to allow a user decide whether to remove the individual columns or rows from the data sets. Default 0.1. Therefore, if a column has more than 10 % missing values, then it will be removed from the dataset rather than the rows." type="float"/> + <param name="classify_or_autoremove" label="Either use outlier classification or autoremoval with threshold or LOESS method" optional="false" help="The parameter allows to switch from outlification that labels all records as perfect outlier to fair outliers to allows further scrutiny. Otherwise, the outliers will be dropped based on a threshold set naively or using LOESS method." type="boolean" truevalue="True" falsevalue="False"/> + <param name="bool_loess" label="Data extraction parameter: LOESS" optional="true" help="If set to true then the local regression method for data extraction will be used" type="boolean" truevalue="True" falsevalue="False"/> + <param name="threshold_clean" label="Data extraction parameter: Threshold setting" optional="true" help="If Data extraction parameter: LOESS is NO, then a threshold value can be provided. Default is 0.8. The threshold will significantly determine which is flagged as an absolute outlier." type="float"/> + <param name="outlierweights_mode" label="Data extraction parameter: Outlier weighting" optional="true" help="Outlier weighting and selection methods. abs uses record proportional to identify an absolute outlier. Default is abs" type="text"/> + <param name="classifymode" label="Cuts data into classess" optional="true" help="Categorize data base on the correlation coefficient manner based on Akoglu 2018" type="integer"/> + <param name="eif_bool" label="Emprical Influence Function" optional="true" help="Computes the Emprical Influence Function for classified outliers. Default is NO" type="boolean" truevalue="True" falsevalue="False"/> + </when> </conditional> </inputs> <outputs>
--- a/macros.xml Mon Jul 21 06:19:08 2025 +0000 +++ b/macros.xml Fri Aug 15 11:19:22 2025 +0000 @@ -41,7 +41,7 @@ <param name="input_data" value="points_att_polygon_test_input_3.txt"/> <param name="colname_long" value="longitude"/> <param name="colname_lat" value="latitude"/> - <param name="colname_value_name" value="transparency_m"/> + <param name="colname_value_name" value="transparen"/> <param name="colname_region_id" value="HELCOM_ID"/> </conditional> <output name="output_data">
--- a/test-data/points_att_polygon_test_input_2.txt Mon Jul 21 06:19:08 2025 +0000 +++ b/test-data/points_att_polygon_test_input_2.txt Fri Aug 15 11:19:22 2025 +0000 @@ -1,1 +1,1 @@ -https://vm4072.kaj.pouta.csc.fi/ddas/oapif/collections/lva_secchi/items?f=json&limit=3000 \ No newline at end of file +https://vm4072.kaj.pouta.csc.fi/ddas/oapif/collections/lva_secchi/items?f=json&limit=5871 \ No newline at end of file
--- a/test-data/points_att_polygon_test_input_3.txt Mon Jul 21 06:19:08 2025 +0000 +++ b/test-data/points_att_polygon_test_input_3.txt Fri Aug 15 11:19:22 2025 +0000 @@ -1,1 +1,1 @@ -https://aquainfra.ogc.igb-berlin.de/download/out/data_merged_with_regions-acd86cde-dfae-11ef-93fb-fa163e42fba0.csv \ No newline at end of file +https://aquainfra.ogc.igb-berlin.de/download/out/data_merged_with_regions-8355b3d0-79bb-11f0-9ce7-fa163e42fba0.csv \ No newline at end of file