# HG changeset patch # User recetox # Date 1687787744 0 # Node ID a393c438343617a0bf9124948f071470ddbf967d # Parent 464c1e80a01f8e243e7c0c732a342b6ad1c7f54f planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox-xmsannotator commit 1ab1a1dabfcebe11720de1411927a7438c1b64c1 diff -r 464c1e80a01f -r a393c4383436 macros.xml --- a/macros.xml Fri Jan 28 16:27:30 2022 +0000 +++ b/macros.xml Mon Jun 26 13:55:44 2023 +0000 @@ -1,74 +1,144 @@ - 0.9.0 - - - r-recetox-xmsannotator - - - - - - - - - - - - - mz and rt. - ]]> - - - - compound_id, monoisotopic_mass, and molecular_formula. - ]]> - - - - adduct, charge, mass, and n_molecules. - ]]> - - +0.10.0 + + + + r-recetox-xmsannotator + + + + + + + + + + + + + + + + + + + + + + + + compound_id, monoisotopic_mass, and molecular_formula. + ]]> + + + + adduct, charge, mass, and n_molecules. + ]]> + + + + + A weight-by-adduct table. + + + - - - + + + + + + + + + + + + Mass tolerance in ppm for database matching. + + + + + +Description +=========== + +Annotate the peak intensity table (e.g. from an apLCMS run) with compounds from the compounds database +using advanced methods. + +The annotation process generates all possible compound-adduct pairs and matches those pairs to the measured +peaks. A compound-adduct pair is pronounced as a match to a certain peak when the difference of their masses are +withing some tolerance. + +Then, a score and a confidence level is assigned to each match based on peak correlation +clustering, metabolite pathway associations, adducts expectations, and isotope conformations. + +Input tables description +------------------------ + +(*) Metadata table +~~~~~~~~~~~~~~~~~~ - - - - Mass tolerance in ppm for database matching. - - - - - - - - 10.1021/acs.analchem.6b01214 - +The output from recetox-aplcms tool. +This table contains the peak area for aligned features in all samples. + ++-------+------------------------+------------------------+------------------------+ +| id | 21_qc_no_dil_milliq | 29_qc_no_dil_milliq | 8_qc_no_dil_milliq | ++=======+========================+========================+========================+ +| 1 | 13187487.20482895 | 7957395.699119729 | 11700594.397257797 | ++-------+------------------------+------------------------+------------------------+ +| 2 | 2075168.6398983458 | 0 | 2574362.159289044 | ++-------+------------------------+------------------------+------------------------+ +| 57 | 2934524.4406785755 | 1333044.5065971944 | 0 | ++-------+------------------------+------------------------+------------------------+ +| ... | ... | ... | ... | ++-------+------------------------+------------------------+------------------------+ + + + + 10.1021/acs.analchem.6b01214 + diff -r 464c1e80a01f -r a393c4383436 recetox_xmsannotator_advanced.xml --- a/recetox_xmsannotator_advanced.xml Fri Jan 28 16:27:30 2022 +0000 +++ b/recetox_xmsannotator_advanced.xml Mon Jun 26 13:55:44 2023 +0000 @@ -1,43 +1,49 @@ - + + annotate peak intensity table including scores and confidence levels macros.xml + + recetox-xmsannotator + @@ -47,15 +53,11 @@ - Retention time tolerance in seconds for finding peaks derived from the same parent metabolite. + Retention time tolerance in seconds for finding peaks derived from the same parent compound.
- - - - Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite. @@ -77,8 +79,8 @@ Network type parameter affects how the network's adjacency matrix is created from the correlation matrix (see WGCNA package documentation). - - + +
@@ -89,21 +91,6 @@ cluster. Otherwise, do not account for cluster membership. - - - - Require the presence of certain adducts for a high confidence match. By default, at least the - presence of an M+H adduct is required for a high confidence match. - - - - - - Table of previously validated compounds to boost their scores and confidence levels. - The 1st column of the table must contain IDs of compounds. - The optional 2nd and 3rd columns may contain mz values and retention times. - - @@ -121,17 +108,58 @@ Whether to filter out low-scored multiple matcher or not.
+ + + A numeric threshold by which an intensity ratio of two isotopic peaks may differ from their actual abundance ratio. + + + + Maximum difference in mass defect between two peaks of the same compound. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 464c1e80a01f -r a393c4383436 utils.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.R Mon Jun 26 13:55:44 2023 +0000 @@ -0,0 +1,37 @@ +library(recetox.xmsannotator) +library(dplyr) + +load_table <- function(filename, filetype) { + if (filename == "None") { + return(NULL) + } + if (filetype == "csv") { + return(as.data.frame(read.csv(filename))) + } else { + return(as.data.frame(arrow::read_parquet(filename))) + } +} + +save_table <- function(table, filename, filetype) { + if (filetype == "csv") { + write.csv(table, filename, row.names = FALSE) + } else { + arrow::write_parquet(table, filename) + } +} + +create_filter_by_adducts <- function(comma_separated_values) { + if (comma_separated_values == "None") { + return(NA) + } + filter_by <- strsplit(trimws(comma_separated_values), ",")[[1]] + return(filter_by) +} + +create_peak_table <- function(metadata_table, intensity_table) { + metadata_table <- select(metadata_table, id, mz, rt) + peak_table <- inner_join(metadata_table, intensity_table, by = "id") + peak_table <- rename(peak_table, peak = id) + peak_table$peak <- as.integer(peak_table$peak) + return(peak_table) +}