# HG changeset patch
# User recetox
# Date 1687787744 0
# Node ID a393c438343617a0bf9124948f071470ddbf967d
# Parent 464c1e80a01f8e243e7c0c732a342b6ad1c7f54f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox-xmsannotator commit 1ab1a1dabfcebe11720de1411927a7438c1b64c1
diff -r 464c1e80a01f -r a393c4383436 macros.xml
--- a/macros.xml Fri Jan 28 16:27:30 2022 +0000
+++ b/macros.xml Mon Jun 26 13:55:44 2023 +0000
@@ -1,74 +1,144 @@
- 0.9.0
-
-
- r-recetox-xmsannotator
-
-
-
-
-
-
-
-
-
-
-
-
- mz and rt.
- ]]>
-
-
-
- compound_id, monoisotopic_mass, and molecular_formula.
- ]]>
-
-
-
- adduct, charge, mass, and n_molecules.
- ]]>
-
-
+0.10.0
+
+
+
+ r-recetox-xmsannotator
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ compound_id, monoisotopic_mass, and molecular_formula.
+ ]]>
+
+
+
+ adduct, charge, mass, and n_molecules.
+ ]]>
+
+
+
+
+ A weight-by-adduct table.
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+ Mass tolerance in ppm for database matching.
+
+
+
+
+
+Description
+===========
+
+Annotate the peak intensity table (e.g. from an apLCMS run) with compounds from the compounds database
+using advanced methods.
+
+The annotation process generates all possible compound-adduct pairs and matches those pairs to the measured
+peaks. A compound-adduct pair is pronounced as a match to a certain peak when the difference of their masses are
+withing some tolerance.
+
+Then, a score and a confidence level is assigned to each match based on peak correlation
+clustering, metabolite pathway associations, adducts expectations, and isotope conformations.
+
+Input tables description
+------------------------
+
+(*) Metadata table
+~~~~~~~~~~~~~~~~~~
-
-
-
- Mass tolerance in ppm for database matching.
-
-
-
-
-
-
-
- 10.1021/acs.analchem.6b01214
-
+The output from recetox-aplcms tool.
+This table contains the peak area for aligned features in all samples.
+
++-------+------------------------+------------------------+------------------------+
+| id | 21_qc_no_dil_milliq | 29_qc_no_dil_milliq | 8_qc_no_dil_milliq |
++=======+========================+========================+========================+
+| 1 | 13187487.20482895 | 7957395.699119729 | 11700594.397257797 |
++-------+------------------------+------------------------+------------------------+
+| 2 | 2075168.6398983458 | 0 | 2574362.159289044 |
++-------+------------------------+------------------------+------------------------+
+| 57 | 2934524.4406785755 | 1333044.5065971944 | 0 |
++-------+------------------------+------------------------+------------------------+
+| ... | ... | ... | ... |
++-------+------------------------+------------------------+------------------------+
+
+
+
+ 10.1021/acs.analchem.6b01214
+
diff -r 464c1e80a01f -r a393c4383436 recetox_xmsannotator_advanced.xml
--- a/recetox_xmsannotator_advanced.xml Fri Jan 28 16:27:30 2022 +0000
+++ b/recetox_xmsannotator_advanced.xml Mon Jun 26 13:55:44 2023 +0000
@@ -1,43 +1,49 @@
-
+
+
annotate peak intensity table including scores and confidence levels
macros.xml
+
+ recetox-xmsannotator
+
@@ -47,15 +53,11 @@
- Retention time tolerance in seconds for finding peaks derived from the same parent metabolite.
+ Retention time tolerance in seconds for finding peaks derived from the same parent compound.
-
-
-
-
Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite.
@@ -77,8 +79,8 @@
Network type parameter affects how the network's adjacency matrix is created from the correlation
matrix (see WGCNA package documentation).
-
-
+
+
@@ -89,21 +91,6 @@
cluster. Otherwise, do not account for cluster membership.
-
-
-
- Require the presence of certain adducts for a high confidence match. By default, at least the
- presence of an M+H adduct is required for a high confidence match.
-
-
-
-
-
- Table of previously validated compounds to boost their scores and confidence levels.
- The 1st column of the table must contain IDs of compounds.
- The optional 2nd and 3rd columns may contain mz values and retention times.
-
-
@@ -121,17 +108,58 @@
Whether to filter out low-scored multiple matcher or not.
+
+
+ A numeric threshold by which an intensity ratio of two isotopic peaks may differ from their actual abundance ratio.
+
+
+
+ Maximum difference in mass defect between two peaks of the same compound.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 464c1e80a01f -r a393c4383436 utils.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.R Mon Jun 26 13:55:44 2023 +0000
@@ -0,0 +1,37 @@
+library(recetox.xmsannotator)
+library(dplyr)
+
+load_table <- function(filename, filetype) {
+ if (filename == "None") {
+ return(NULL)
+ }
+ if (filetype == "csv") {
+ return(as.data.frame(read.csv(filename)))
+ } else {
+ return(as.data.frame(arrow::read_parquet(filename)))
+ }
+}
+
+save_table <- function(table, filename, filetype) {
+ if (filetype == "csv") {
+ write.csv(table, filename, row.names = FALSE)
+ } else {
+ arrow::write_parquet(table, filename)
+ }
+}
+
+create_filter_by_adducts <- function(comma_separated_values) {
+ if (comma_separated_values == "None") {
+ return(NA)
+ }
+ filter_by <- strsplit(trimws(comma_separated_values), ",")[[1]]
+ return(filter_by)
+}
+
+create_peak_table <- function(metadata_table, intensity_table) {
+ metadata_table <- select(metadata_table, id, mz, rt)
+ peak_table <- inner_join(metadata_table, intensity_table, by = "id")
+ peak_table <- rename(peak_table, peak = id)
+ peak_table$peak <- as.integer(peak_table$peak)
+ return(peak_table)
+}