changeset 12:ae1044bcf13d draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e87d28ea433cc26db7fe44768685d08c06f7a0d0"
author eschen42
date Tue, 15 Mar 2022 18:17:19 +0000
parents 302918bd77e0
children 28a126da9b28
files macros.xml mqppep_anova.R mqppep_anova_script.Rmd repository_dependencies.xml test-data/alpha_levels.tabular test-data/test_input_for_anova.tabular workflow/ppenrich_suite_wf.ga
diffstat 7 files changed, 1 insertions(+), 1718 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue Mar 15 12:44:04 2022 +0000
+++ b/macros.xml	Tue Mar 15 18:17:19 2022 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.1.2</token>
+    <token name="@TOOL_VERSION@">0.1.3</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
--- a/mqppep_anova.R	Tue Mar 15 12:44:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,207 +0,0 @@
-#!/usr/bin/env Rscript
-# libraries
-library(optparse)
-library(data.table)
-library(stringr)
-# bioconductor-preprocesscore
-#  - libopenblas
-#  - r-data.table
-#  - r-rmarkdown
-#  - r-ggplot2
-#  - texlive-core
-
-# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
-
-# parse options
-option_list <- list(
-  make_option(
-    c("-i", "--inputFile"),
-    action = "store",
-    default = NA,
-    type = "character",
-    help = "Phosphopeptide Intensities sparse input file path"
-  ),
-  make_option(
-    c("-a", "--alphaFile"),
-    action = "store",
-    default = NA,
-    type = "character",
-    help = paste0("List of alpha cutoff values for significance testing;",
-             " path to text file having one column and no header")
-  ),
-  make_option(
-    c("-f", "--firstDataColumn"),
-    action = "store",
-    default = "10",
-    type = "character",
-    help = "First column of intensity values"
-  ),
-  make_option(
-    c("-m", "--imputationMethod"),
-    action = "store",
-    default = "group-median",
-    type = "character",
-    help = paste0("Method for missing-value imputation,",
-             " one of c('group-median','median','mean','random')")
-  ),
-  make_option(
-    c("-p", "--meanPercentile"),
-    action = "store",
-    default = 3,
-    type = "integer",
-    help = paste0("Mean percentile for randomly generated imputed values;",
-              ", range [1,99]")
-  ),
-  make_option(
-    c("-d", "--sdPercentile"),
-    action = "store",
-    default = 3,
-    type = "double",
-    help = paste0("Adjustment value for standard deviation of",
-              " randomly generated imputed values; real")
-  ),
-  make_option(
-    c("-s", "--regexSampleNames"),
-    action = "store",
-    default = "\\.(\\d+)[A-Z]$",
-    type = "character",
-    help = "Regular expression extracting sample-names"
-  ),
-  make_option(
-    c("-g", "--regexSampleGrouping"),
-    action = "store",
-    default = "(\\d+)",
-    type = "character",
-    help = paste0("Regular expression extracting sample-group",
-             " from an extracted sample-name")
-  ),
-  make_option(
-    c("-o", "--imputedDataFile"),
-    action = "store",
-    default = "output_imputed.tsv",
-    type = "character",
-    help = "Imputed Phosphopeptide Intensities output file path"
-  ),
-  make_option(
-    c("-r", "--reportFile"),
-    action = "store",
-    default = "QuantDataProcessingScript.html",
-    type = "character",
-    help = "HTML report file path"
-  )
-)
-args <- parse_args(OptionParser(option_list = option_list))
-
-# Check parameter values
-
-if (! file.exists(args$inputFile)) {
-  stop((paste("Input file", args$inputFile, "does not exist")))
-}
-input_file <- args$inputFile
-alpha_file <- args$alphaFile
-first_data_column <- args$firstDataColumn
-imputation_method <- args$imputationMethod
-mean_percentile <- args$meanPercentile
-sd_percentile <- args$sdPercentile
-
-regex_sample_names    <- gsub("^[ \t\n]*", "",
-                         readChar(args$regexSampleNames,  1000)
-                       )
-regex_sample_names    <- gsub("[ \t\n]*$", "",
-                         regex_sample_names
-                       )
-cat(regex_sample_names)
-cat("\n")
-
-regex_sample_grouping <- gsub("^[ \t\n]*", "",
-                           readChar(args$regexSampleGrouping, 1000)
-                         )
-regex_sample_grouping <- gsub("[ \t\n]*$", "",
-                           regex_sample_grouping
-                         )
-cat(regex_sample_grouping)
-cat("\n")
-
-imputed_data_file_name <- args$imputedDataFile
-report_file_name <- args$reportFile
-
-print("args is:")
-cat(str(args))
-
-print("regex_sample_names is:")
-cat(str(regex_sample_names))
-
-print("regex_sample_grouping is:")
-cat(str(regex_sample_grouping))
-
-# from: https://github.com/molgenis/molgenis-pipelines/wiki/
-#   How-to-source-another_file.R-from-within-your-R-script
-# Function location_of_this_script returns the location of this .R script
-#   (may be needed to source other files in same dir)
-location_of_this_script <- function() {
-    this_file <- NULL
-    # This file may be 'sourced'
-    for (i in - (1:sys.nframe())) {
-        if (identical(sys.function(i), base::source)) {
-            this_file <- (normalizePath(sys.frame(i)$ofile))
-        }
-    }
-
-    if (!is.null(this_file)) return(dirname(this_file))
-
-    # But it may also be called from the command line
-    cmd_args <- commandArgs(trailingOnly = FALSE)
-    cmd_args_trailing <- commandArgs(trailingOnly = TRUE)
-    cmd_args <- cmd_args[
-      seq.int(
-        from = 1,
-        length.out = length(cmd_args) - length(cmd_args_trailing)
-        )
-      ]
-    res <- gsub("^(?:--file=(.*)|.*)$", "\\1", cmd_args)
-
-    # If multiple --file arguments are given, R uses the last one
-    res <- tail(res[res != ""], 1)
-    if (0 < length(res)) return(dirname(res))
-
-    # Both are not the case. Maybe we are in an R GUI?
-    return(NULL)
-}
-
-script_dir <-  location_of_this_script()
-
-rmarkdown_params <- list(
-    inputFile = input_file
-  , alphaFile = alpha_file
-  , firstDataColumn = first_data_column
-  , imputationMethod = imputation_method
-  , meanPercentile = mean_percentile
-  , sdPercentile = sd_percentile
-  , regexSampleNames = regex_sample_names
-  , regexSampleGrouping = regex_sample_grouping
-  , imputedDataFilename = imputed_data_file_name
-  )
-
-str(rmarkdown_params)
-
-# BUG
-# Must render as HTML for the time being until this issue is resolved:
-#   https://github.com/conda-forge/texlive-core-feedstock/issues/19
-# for reason:
-#   "The following dependencies are not available in conda"
-# reported here:
-#   https://github.com/ami-iit/bipedal-locomotion-framework/pull/457
-
-# freeze the random number generator so the same results will be produced
-#  from run to run
-set.seed(28571)
-
-
-library(tinytex)
-tinytex::install_tinytex()
-rmarkdown::render(
-  input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")
-, output_format = rmarkdown::pdf_document()
-, output_file = report_file_name
-, params = rmarkdown_params
-)
--- a/mqppep_anova_script.Rmd	Tue Mar 15 12:44:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,801 +0,0 @@
----
-title: "MaxQuant Phospho-Proteomic Enrichment Pipeline ANOVA"
-author: "Larry Cheng; Art Eschenlauer"
-date: "May 28, 2018; Nov 16, 2021"
-output:
-  pdf_document: default
-params:
-  inputFile: "test-data/test_input_for_anova.tabular"
-  alphaFile: "test-data/alpha_levels.tabular"
-  firstDataColumn: "Intensity"
-  imputationMethod: !r c("group-median", "median", "mean", "random")[1]
-  meanPercentile: 1
-  sdPercentile: 0.2
-  regexSampleNames: "\\.(\\d+)[A-Z]$"
-  regexSampleGrouping: "(\\d+)"
-  imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt"
----
-```{r setup, include = FALSE}
-# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
-knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))
-
-### FUNCTIONS
-
-#ANOVA filter function
-anova_func <- function(x, grouping_factor) {
-  x_aov <- aov(as.numeric(x) ~ grouping_factor)
-  pvalue <- summary(x_aov)[[1]][["Pr(>F)"]][1]
-  pvalue
-}
-```
-
-## Purpose:
-Perform imputation of missing values, quantile normalization, and ANOVA.
-
-<!--
-## Variables to change for each input file
--->
-```{r include = FALSE}
-# Input Filename
-input_file <- params$inputFile
-
-# First data column - ideally, this could be detected via regexSampleNames,
-#   but for now leave it as is.
-first_data_column <- params$firstDataColumn
-fdc_is_integer <- TRUE
-first_data_column <- withCallingHandlers(
-    as.integer(first_data_column)
-  , warning = function(w) fdc_is_integer <<- FALSE
-  )
-if (FALSE == fdc_is_integer) {
-  first_data_column <- params$firstDataColumn
-}
-
-# False discovery rate adjustment for ANOVA
-#  Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05
-val_fdr <-
-  read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1]
-
-#Imputed Data filename
-imputed_data_filename <- params$imputedDataFilename
-
-#ANOVA data filename
-```
-
-```{r echo = FALSE}
-# Imputation method, should be one of
-#   "random", "group-median", "median", or "mean"
-imputation_method <- params$imputationMethod
-
-# Selection of percentile of logvalue data to set the mean for random number
-#   generation when using random imputation
-mean_percentile <- params$meanPercentile / 100.0
-
-# deviation adjustment-factor for random values; real number.
-sd_percentile <- params$sdPercentile
-
-# Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$"
-regex_sample_names <- params$regexSampleNames
-
-# Regular expression to extract Sample Grouping from Sample Name;
-#   if error occurs, compare sample_factor_levels and temp_matches
-#   to see if groupings/pairs line up
-#   e.g., "(\\d+)"
-regex_sample_grouping <- params$regexSampleGrouping
-
-```
-
-```{r echo = FALSE}
-### READ DATA
-
-library(data.table)
-
-# read.table reads a file in table format and creates a data frame from it.
-#   - note that `quote = ""` means that quotation marks are treated literally.
-full_data <- read.table(
-  file = input_file,
-  sep = "\t",
-  header = T,
-  quote = "",
-  check.names = FALSE
-  )
-```
-
-### Column names from input file
-
-```{r echo = FALSE, results = 'markup'}
-print(colnames(full_data))
-data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
-cat(sprintf("First data column:  %d\n", min(data_column_indices)))
-cat(sprintf("Last data column:   %d\n", max(data_column_indices)))
-```
-
-```{r echo = FALSE, results = 'asis'}
-cat("\\newpage\n")
-```
-
-### Checking that log-transformed sample distributions are similar:
-
-```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
-
-if (FALSE == fdc_is_integer) {
-
-  if (length(data_column_indices) > 0) {
-    first_data_column <- data_column_indices[1]
-  } else {
-    stop(paste("failed to convert firstDataColumn:", first_data_column))
-  }
-}
-
-quant_data0 <- full_data[first_data_column:length(full_data)]
-quant_data <- full_data[first_data_column:length(full_data)]
-quant_data[quant_data == 0] <- NA  #replace 0 with NA
-quant_data_log <- log10(quant_data)
-
-rownames(quant_data_log) <- full_data$Phosphopeptide
-
-# data visualization
-old_par <- par(
-  mai = par("mai") + c(0.5, 0, 0, 0)
-)
-boxplot(
-  quant_data_log
-, las = 2
-)
-par(old_par)
-
-
-
-cat("\\newline\n")
-cat("\\newline\n")
-
-```
-
-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), warning = FALSE}
-quant_data_log_stack <- stack(quant_data_log)
-library(ggplot2)
-ggplot(
-  quant_data_log_stack,
-  aes(x = values)) + geom_density(aes(group = ind, colour = ind))
-```
-
-### Globally, are phosphopeptide intensities are approximately unimodal?
-
-<!--
-# ref for bquote below particularly and plotting math expressions generally:
-#   https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/
--->
-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)}
-
-# identify the location of missing values
-fin <- is.finite(as.numeric(as.matrix(quant_data_log)))
-
-logvalues <- as.numeric(as.matrix(quant_data_log))[fin]
-plot(
-  density(logvalues),
-  main = bquote(
-    "Smoothed estimated probability density vs." ~ log[10](intensity)),
-  xlab = bquote(log[10](intensity))
-  )
-hist(
-  x = as.numeric(as.matrix(quant_data_log))
-, breaks = 100
-, main = bquote("Frequency vs." ~ log[10](intensity))
-, xlab = bquote(log[10](intensity))
-)
-```
-
-### Distribution of standard deviations of phosphopeptides, ignoring missing values:
-
-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)}
-# determine quantile
-q1 <- quantile(logvalues, probs = mean_percentile)[1]
-
-# determine standard deviation of quantile to impute
-sd_finite <- function(x) {
-  ok <- is.finite(x)
-  sd(x[ok]) * sd_percentile
-}
-# 1 = row of matrix (ie, phosphopeptide)
-sds <- apply(quant_data_log, 1, sd_finite)
-plot(
-  density(sds, na.rm = T)
-, main = "Smoothed estimated probability density vs. std. deviation"
-, sub = "(probability estimation made with Gaussian smoothing)"
-)
-
-m1 <- median(sds, na.rm = T) #sd to be used is the median sd
-
-```
-
-
-
-<!--
-The number of missing values are:
--->
-```{r echo = FALSE}
-#Determine number of cells to impute
-temp <- quant_data[is.na(quant_data)]
-
-#Determine number of values to impute
-number_to_impute <- length(temp)
-```
-
-<!--
-% of values that are missing:
--->
-```{r echo = FALSE}
-pct_missing_values <- length(temp) / (length(logvalues) + length(temp)) * 100
-```
-
-<!--
-First few rows of data before imputation:
--->
-```{r echo = FALSE, results = 'asis'}
-cat("\\newpage\n")
-```
-
-## Parse sample names
-
-Parse the names of the samples to deduce the factor level for each sample:
-
-```{r echo = FALSE}
-
-# prep for trt-median based imputation
-
-# Assuming that regex_sample_names <- "\\.(\\d+)[A-Z]$"
-#   get factors ->
-#      group runs (samples) by ignoring terminal [A-Z] in sample names
-
-m <- regexpr(regex_sample_names, names(quant_data), perl = TRUE)
-temp_matches <- regmatches(names(quant_data), m)
-print("Extracted sample names")
-print(temp_matches)
-m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
-sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
-print("Factor levels")
-print(sample_factor_levels)
-
-```
-## Impute missing values
-
-```{r echo = FALSE}
-
-#Determine number of cells to impute
-cat("Before imputation,",
-  sprintf(
-    "there are:\n  %d peptides\n  %d missing values (%2.0f%s)",
-    sum(rep.int(TRUE, nrow(quant_data))),
-    sum(is.na(quant_data)),
-    pct_missing_values,
-    "%"
-    )
-)
-
-```
-```{r echo = FALSE}
-
-#Impute data
-quant_data_imp <- quant_data
-
-# Identify which values are missing and need to be imputed
-ind <- which(is.na(quant_data_imp), arr.ind = TRUE)
-
-```
-```{r echo = FALSE}
-
-# Apply imputation
-switch(
-  imputation_method
-, "group-median" = {
-    cat("Imputation method:\n   substitute missing value",
-      "with median peptide-intensity for sample-group\n")
-
-    sample_level_integers <- as.integer(sample_factor_levels)
-    for (i in seq_len(length(levels(sample_factor_levels)))) {
-      level_cols <- i == sample_level_integers
-      ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE)
-      quant_data_imp[ind, level_cols] <-
-        apply(quant_data_imp[, level_cols], 1, median, na.rm = T)[ind[, 1]]
-    }
-    good_rows <- !is.na(rowMeans(quant_data_imp))
-  }
-, "median" = {
-    cat("Imputation method:\n   substitute missing value with",
-      "median peptide-intensity across all sample classes\n")
-    quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]]
-    good_rows <- !is.na(rowMeans(quant_data_imp))
-  }
-, "mean" = {
-    cat("Imputation method:\n   substitute missing value with",
-      "mean peptide-intensity across all sample classes\n")
-    quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]]
-    good_rows <- !is.na(rowMeans(quant_data_imp))
-  }
-, "random" = {
-    cat(
-      "Imputation method:\n   substitute missing value with\n  ",
-      sprintf(
-        "random intensity N ~ (%0.2f, %0.2f)\n"
-      , q1, m1
-      )
-    )
-    quant_data_imp[is.na(quant_data_imp)] <-
-      10 ^ rnorm(number_to_impute, mean = q1, sd = m1)
-    good_rows <- !is.na(rowMeans(quant_data_imp))
-  }
-)
-
-```
-```{r echo = FALSE}
-
-#Determine number of cells to impute
-temp <- quant_data_imp[is.na(quant_data_imp)]
-cat("After imputation, there are:",
-  sprintf(
-    "\n  %d missing values\n  %d usable peptides analysis"
-  , sum(is.na(quant_data_imp[good_rows, ]))
-  , sum(good_rows)
-  ),
-  sprintf(
-    "\n  %d peptides with too many missing values for further analysis"
-  , sum(!good_rows)
-  )
-)
-```
-```{r echo = FALSE}
-
-
-# Zap rows where imputation was ineffective
-full_data         <- full_data        [good_rows, ]
-quant_data        <- quant_data       [good_rows, ]
-quant_data_imp <- quant_data_imp[good_rows, ]
-
-```
-```{r echo = FALSE}
-
-d_combined <- (density(as.numeric(as.matrix(
-  log10(quant_data_imp)
-))))
-d_original <-
-  density(as.numeric(as.matrix(
-    log10(quant_data_imp[!is.na(quant_data)]))))
-
-```
-```{r echo = FALSE}
-
-if (sum(is.na(quant_data)) > 0) {
-  # There ARE missing values
-  d_imputed <-
-    (density(as.numeric(as.matrix(
-      log10(quant_data_imp[is.na(quant_data)])
-    ))))
-} else {
-  # There are NO missing values
-  d_imputed <- d_combined
-}
-
-```
-
-```{r echo = FALSE, fig.dim = c(9, 5)}
-ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y))
-plot(
-  d_combined,
-  ylim = ylim,
-  sub = "Blue = data before imputation; Red = imputed data",
-  main = "Density vs. log10(intensity) before and after imputation"
-)
-lines(d_original, col = "blue")
-lines(d_imputed, col = "red")
-```
-
-## Perform Quantile Normalization
-
-<!--
-# Apply quantile normalization using preprocessCore::normalize.quantiles
-# ---
-# tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html
-#   except this: https://support.bioconductor.org/p/122925/#9135989
-#   says to install it like this:
-#     ```
-#     BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE, lib=.libPaths()[1])
-#     ```
-# conda installation (necessary because of a bug in recent openblas):
-#   conda install bioconductor-preprocesscore openblas=0.3.3
-# ...
-# ---
-# normalize.quantiles {preprocessCore}	--  Quantile Normalization
-#
-# Description:
-#   Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities.
-#
-# Usage:
-#   normalize.quantiles(x, copy = TRUE, keep.names = FALSE)
-#
-# Arguments:
-#
-#   - x: A matrix of intensities where each column corresponds to a chip and each row is a probe.
-#
-#   - copy: Make a copy of matrix before normalizing. Usually safer to work with a copy,
-#       but in certain situations not making a copy of the matrix, but instead normalizing
-#       it in place will be more memory friendly.
-#
-#   - keep.names: Boolean option to preserve matrix row and column names in output.
-#
-# Details:
-#   This method is based upon the concept of a quantile-quantile plot extended to n dimensions.
-#     No special allowances are made for outliers. If you make use of quantile normalization
-#     please cite Bolstad et al, Bioinformatics (2003).
-#
-#   This functions will handle missing data (ie NA values), based on
-#     the assumption that the data is missing at random.
-#
-#   Note that the current implementation optimizes for better memory usage
-#     at the cost of some additional run-time.
-#
-# Value: A normalized matrix.
-#
-# Author: Ben Bolstad, bmbolstad.com
-#
-# References
-#
-#   - Bolstad, B (2001) Probe Level Quantile Normalization of High Density Oligonucleotide
-#       Array Data. Unpublished manuscript http://bmbolstad.com/stuff/qnorm.pdf
-#
-#   - Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) A Comparison of
-#       Normalization Methods for High Density Oligonucleotide Array Data Based on Bias
-#       and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185
-#       http://bmbolstad.com/misc/normalize/normalize.html
-# ...
--->
-```{r echo = FALSE}
-library(preprocessCore)
-
-if (TRUE) {
-  quant_data_imp_qn <- normalize.quantiles(as.matrix(quant_data_imp))
-} else {
-  quant_data_imp_qn <- as.matrix(quant_data_imp)
-}
-
-quant_data_imp_qn <- as.data.frame(quant_data_imp_qn)
-names(quant_data_imp_qn) <- names(quant_data_imp)
-quant_data_imp_qn_log <- log10(quant_data_imp_qn)
-
-rownames(quant_data_imp_qn_log) <- full_data[, 1]
-
-quant_data_imp_qn_ls <- t(scale(t(log10(quant_data_imp_qn))))
-any_nan <- function(x) {
-  !any(x == "NaN")
-}
-sel <- apply(quant_data_imp_qn_ls, 1, any_nan)
-quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls[which(sel), ]
-quant_data_imp_qn_ls2 <- as.data.frame(quant_data_imp_qn_ls2)
-
-#output quantile normalized data
-data_table_imp_qn_lt <- cbind(full_data[1:9], quant_data_imp_qn_log)
-write.table(
-  data_table_imp_qn_lt,
-  file = paste(paste(
-    strsplit(imputed_data_filename, ".txt"), "QN_LT", sep = "_"
-  ), ".txt", sep = ""),
-  sep = "\t",
-  col.names = TRUE,
-  row.names = FALSE
-)
-
-```
-
-<!-- ACE insertion begin -->
-### Checking that normalized, imputed, log-transformed sample distributions are similar:
-
-```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
-
-
-# Save unimputed quant_data_log for plotting below
-unimputed_quant_data_log <- quant_data_log
-
-# log10 transform (after preparing for zero values,
-#   which should never happen...)
-quant_data_imp_qn[quant_data_imp_qn == 0] <- .000000001
-quant_data_log <- log10(quant_data_imp_qn)
-
-# Output quantile-normalized log-transformed dataset
-#   with imputed, normalized data
-
-data_table_imputed <- cbind(full_data[1:9], quant_data_log)
-write.table(
-    data_table_imputed
-  , file = imputed_data_filename
-  , sep = "\t"
-  , col.names = TRUE
-  , row.names = FALSE
-  , quote = FALSE
-  )
-
-
-
-# data visualization
-old_par <- par(
-  mai = par("mai") + c(0.5, 0, 0, 0)
-, oma = par("oma") + c(0.5, 0, 0, 0)
-)
-boxplot(
-  quant_data_log
-, las = 2
-)
-par(old_par)
-
-
-
-cat("\\newline\n")
-cat("\\newline\n")
-
-```
-
-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4)}
-quant_data_log_stack <- stack(quant_data_log)
-ggplot(
-  quant_data_log_stack,
-  aes(x = values)
-  ) + geom_density(aes(group = ind, colour = ind))
-```
-
-## Perform ANOVA filters
-
-(see following pages)
-
-```{r, echo = FALSE}
-# Make new data frame containing only Phosphopeptides
-#   to connect preANOVA to ANOVA (connect_df)
-connect_df <- data.frame(
-    data_table_imp_qn_lt$Phosphopeptide
-  , data_table_imp_qn_lt[, first_data_column]
-  )
-colnames(connect_df) <- c("Phosphopeptide", "Intensity")
-```
-
-```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
-# Get factors -> group replicates (as indicated by terminal letter)
-#   by the preceding digits;
-#   e.g., group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc..
-m <-
-  regexpr(regex_sample_names, names(quant_data_imp_qn_log), perl = TRUE)
-
-temp_matches <- regmatches(names(quant_data_imp_qn_log), m)
-
-number_of_samples <- length(temp_matches)
-
-m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
-
-
-sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
-
-
-if (length(levels(sample_factor_levels)) < 2) {
-  cat(
-    "ERROR!!!! Cannot perform ANOVA analysis",
-    "because it requires two or more factor levels\n"
-  )
-  cat("Unparsed sample names are:\n")
-  print(names(quant_data_imp_qn_log))
-  cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names))
-  cat("Parsed names are:\n")
-  print(temp_matches)
-  cat(sprintf(
-    "Parsing rule for SampleGrouping is '%s'\n",
-    regex_sample_grouping
-  ))
-  cat("Sample group assignments are:\n")
-  print(regmatches(temp_matches, m2))
-} else {
-  p_value_data_anova_ps <-
-    apply(
-      quant_data_imp_qn_log,
-      1,
-      anova_func,
-      grouping_factor = sample_factor_levels
-      )
-
-  p_value_data_anova_ps_fdr <-
-    p.adjust(p_value_data_anova_ps, method = "fdr")
-  p_value_data <- data.frame(
-    phosphopeptide = full_data[, 1]
-    ,
-    raw_anova_p = p_value_data_anova_ps
-    ,
-    fdr_adjusted_anova_p = p_value_data_anova_ps_fdr
-  )
-
-  # output ANOVA file to constructed filename,
-  #   e.g.    "Outputfile_pST_ANOVA_STEP5.txt"
-  #   becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt"
-
-  # Re-output quantile-normalized log-transformed dataset
-  #   with imputed, normalized data to include p-values
-
-  data_table_imputed <-
-    cbind(full_data[1:9], p_value_data[, 2:3], quant_data_log)
-  write.table(
-    data_table_imputed,
-    file = imputed_data_filename,
-    sep = "\t",
-    col.names = TRUE,
-    row.names = FALSE,
-    quote = FALSE
-    )
-
-
-  p_value_data <-
-    p_value_data[order(p_value_data$fdr_adjusted_anova_p), ]
-
-  cutoff <- val_fdr[1]
-  for (cutoff in val_fdr) {
-    #loop through FDR cutoffs
-
-    filtered_p <-
-      p_value_data[
-        which(p_value_data$fdr_adjusted_anova_p < cutoff),
-        ,
-        drop = FALSE
-        ]
-    filtered_data_filtered <-
-      quant_data_imp_qn_log[
-        rownames(filtered_p),
-        ,
-        drop = FALSE
-        ]
-    filtered_data_filtered <-
-      filtered_data_filtered[
-        order(filtered_p$fdr_adjusted_anova_p),
-        ,
-        drop = FALSE
-        ]
-
-    # <!-- ACE insertion start -->
-    old_oma <- par("oma")
-    old_par <- par(
-      mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
-      oma = old_oma * c(1, 1, 0.3, 1),
-      cex.main = 0.9,
-      cex.axis = 0.7
-      )
-
-    cat("\\newpage\n")
-    if (nrow(filtered_data_filtered) > 0) {
-      cat(sprintf(
-        "Intensities for peptides whose adjusted p-value < %0.2f\n",
-        cutoff
-      ))
-      cat("\\newline\n")
-      cat("\\newline\n")
-
-      boxplot(
-        filtered_data_filtered,
-        main = "Imputed, normalized intensities", # no line plot
-        las = 2,
-        ylab = expression(log[10](intensity))
-      )
-    } else {
-      cat(sprintf(
-        "No peptides were found to have cutoff adjusted p-value < %0.2f\n",
-        cutoff
-      ))
-    }
-    par(old_par)
-
-    if (nrow(filtered_data_filtered) > 0) {
-      #Add Phosphopeptide column to anova_filtered table
-      anova_filtered_merge <- merge(
-        x = connect_df
-        ,
-        y = filtered_data_filtered
-        ,
-        by.x = "Intensity"
-        ,
-        by.y = 1
-      )
-      anova_filtered_merge_order <- rownames(filtered_p)
-
-      anova_filtered_merge_format <- sapply(
-        X = filtered_p$fdr_adjusted_anova_p
-        ,
-        FUN = function(x) {
-          if (x > 0.0001)
-            paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s")
-          else
-            paste0("(%0.4e) %s")
-        }
-      )
-
-
-
-      anova_filtered <- data.table(
-        anova_filtered_merge$Phosphopeptide
-        ,
-        anova_filtered_merge$Intensity
-        ,
-        anova_filtered_merge[, 2:number_of_samples + 1]
-      )
-      colnames(anova_filtered) <-
-        c("Phosphopeptide", colnames(filtered_data_filtered))
-
-      # merge qualitative columns into the ANOVA data
-      output_table <- data.frame(anova_filtered$Phosphopeptide)
-      output_table <- merge(
-        x = output_table
-        ,
-        y = data_table_imp_qn_lt
-        ,
-        by.x = "anova_filtered.Phosphopeptide"
-        ,
-        by.y = "Phosphopeptide"
-      )
-
-      #Produce heatmap to visualize significance and the effect of imputation
-      m <-
-        as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ])
-      if (nrow(m) > 0) {
-        rownames_m <- rownames(m)
-        rownames(m) <- sapply(
-          X = seq_len(nrow(m))
-          ,
-          FUN = function(i) {
-            sprintf(
-              anova_filtered_merge_format[i]
-              ,
-              filtered_p$fdr_adjusted_anova_p[i]
-              ,
-              rownames_m[i]
-            )
-          }
-        )
-        margins <- c(max(nchar(colnames(m))) * 10 / 16 # col
-                     , max(nchar(rownames(m))) * 5 / 16 # row
-                     )
-                     how_many_peptides <- min(50, nrow(m))
-
-                     cat("\\newpage\n")
-                     if (nrow(m) > 50) {
-                       cat("Heatmap for the 50 most-significant peptides",
-                         sprintf(
-                           "whose adjusted p-value < %0.2f\n",
-                           cutoff)
-                       )
-                     } else {
-                       cat("Heatmap for peptides whose",
-                         sprintf("adjusted p-value < %0.2f\n",
-                         cutoff)
-                       )
-                     }
-                     cat("\\newline\n")
-                     cat("\\newline\n")
-                     op <- par("cex.main")
-                     try(
-                       if (nrow(m) > 1) {
-                         par(cex.main = 0.6)
-                         heatmap(
-                           m[how_many_peptides:1, ],
-                           Rowv = NA,
-                           Colv = NA,
-                           cexRow = 0.7,
-                           cexCol = 0.8,
-                           scale = "row",
-                           margins = margins,
-                           main =
-                             "Heatmap of unimputed, unnormalized intensities",
-                           xlab = ""
-                           )
-                       }
-                     )
-                     par(op)
-      }
-    }
-  }
-}
-```
-
-<!--
-## Peptide IDs, etc.
-
-See output files.
--->
--- a/repository_dependencies.xml	Tue Mar 15 12:44:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-<?xml version="1.0" ?>
-<repositories description="Suite for preprocessing and ANOVA of MaxQuant results using LC-MS proteomics data from phosphoproteomic enrichment.">
-    <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="07fb0e756c69"/>
-    <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="6c22e8563a93"/>
-</repositories>
\ No newline at end of file
--- a/test-data/alpha_levels.tabular	Tue Mar 15 12:44:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-0.05
-0.1
-0.2
--- a/test-data/test_input_for_anova.tabular	Tue Mar 15 12:44:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-Phosphopeptide	Sequence10	Sequence7	Gene_Name	Phosphoresidue	UniProt_ID	Description	Function Phosphoresidue(PSP=PhosphoSitePlus.org)	Putative Upstream Kinases(PSP=PhosphoSitePlus.org)/Phosphatases/Binding Domains	Intensity.shL.1A	Intensity.shL.1B	Intensity.shL.1C	Intensity.shR.2A	Intensity.shR.2B	Intensity.shR.2C
-AAAAPDSRVpSEEENLK	MAAAAPDSRVpSEEENLKKTPK	AAPDSRVsEEENLKK	RRP15	pS11	Q9Y3B9	RRP15_HUMAN RRP15-like protein OS=Homo sapiens OX=9606 GN=RRP15 PE=1 SV=2	N/A	CK2alpha | Casein kinase II substrate | G protein-coupled receptor kinase 1 substrate | PKC kinase substrate | PKA kinase substrate | BARD1 BRCT domain binding | PKA | CK1 | CK2	38150000	39445000	56305000	55338000	7010600	70203000
-AAAITDMADLEELSRLpSPLPPGpSPGSAAR	MADLEELSRLpSPLPPGSPGSA; LSRLSPLPPGpSPGSAARGRAE	LEELSRLsPLPPGSP | LSPLPPGsPGSAARG	AEBP2; AEBP2	pS18, pS24; pS18, pS24	Q6ZN18; Q6ZN18-2	AEBP2_HUMAN Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2 PE=1 SV=2; AEBP2_HUMAN Isoform 2 of Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2	N/A	N/A	5416400	7101800	385280000	208060000	41426000	352400000
-ADALQAGASQFETpSAAK	LQAGASQFETpSAAKLKRKYWW	GASQFETsAAKLKRK	VAMP2; VAMP3	pS80; pS63	P63027; Q15836	VAMP2_HUMAN_Vesicle-associated membrane protein 2 OS=Homo sapiens OX=9606 GN=VAMP2 PE=1 SV=3; VAMP3_HUMAN_Vesicle-associated membrane protein 3 OS=Homo sapiens OX=9606 GN=VAMP3 PE=1 SV=3	N/A	PKD3 | PKCiota	44627000	41445000	69094000	42521000	5738000	61819000
-DQKLpSELDDR	DKVLERDQKLpSELDDRADALQ	LERDQKLsELDDRAD	VAMP1; VAMP1; VAMP1; VAMP2; VAMP3	pS63; pS63; pS63; pS61; pS44	P23763; P23763-2; P23763-3; P63027; Q15836	VAMP1_HUMAN_Vesicle-associated membrane protein 1 OS=Homo sapiens OX=9606 GN=VAMP1 PE=1 SV=1; VAMP1_HUMAN_Isoform 3 of Vesicle-associated membrane protein 1 OS=Homo sapiens OX=9606 GN=VAMP1; VAMP1_HUMAN_Isoform 2 of Vesicle-associated membrane protein 1 OS=Homo sapiens OX=9606 GN=VAMP1; VAMP2_HUMAN_Vesicle-associated membrane protein 2 OS=Homo sapiens OX=9606 GN=VAMP2 PE=1 SV=3; VAMP3_HUMAN_Vesicle-associated membrane protein 3 OS=Homo sapiens OX=9606 GN=VAMP3 PE=1 SV=3	N/A	CK2alpha | PKAbeta | PKAgamma | PKCiota | Casein kinase II substrate | G protein-coupled receptor kinase 1 substrate | PKC kinase substrate | PKA kinase substrate | Pyruvate dehydrogenase kinase substrate	75542000	44814000	32924000	35016000	11023000	4669900
-EFVpSSDESSSGENK	SESFKSKEFVpSSDESSSGENK	FKSKEFVsSDESSSG	SSRP1	pS667	Q08945	SSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1	N/A	CK2alpha | CK2a2 | CDK7 | Casein kinase II substrate | G protein-coupled receptor kinase 1 substrate | Casein Kinase I substrate | CK2 | GSK3	12562000	16302000	23000000	7857800	0	18830000
-EGMNPSYDEYADpSDEDQHDAYLER	MNPSYDEYADpSDEDQHDAYLE	SYDEYADsDEDQHDA	SSRP1	pS444	Q08945	SSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1	N/A	CK2alpha | CK2a2 | CDK7 | CK1alpha | Casein kinase II substrate | b-Adrenergic Receptor kinase substrate | Pyruvate dehydrogenase kinase substrate	0	0	0	0	0	0
-IGNEEpSDLEEACILPHpSPINVDK	DDEEKIGNEEpSDLEEACILPH; DLEEACILPHpSPINVDKRPIA	EKIGNEEsDLEEACI | EACILPHsPINVDKR	HERC2	pS1577, pS1588	O95714	HERC2_HUMAN E3 ubiquitin-protein ligase HERC2 OS=Homo sapiens OX=9606 GN=HERC2 PE=1 SV=2	N/A	CK2alpha | Casein kinase II substrate | ERK1, ERK2 Kinase substrate | GSK-3, ERK1, ERK2, CDK5 substrate | b-Adrenergic Receptor kinase substrate | WW domain binding | ERK/MAPK | CK2 | NEK6	167764000	121218000	155736000	140640000	83642000	128468000
-IRAEEEDLAAVPFLApSDNEEEEDEK	EDLAAVPFLApSDNEEEEDEKG	AAVPFLAsDNEEEED	HERC2	pS2928	O95714	HERC2_HUMAN E3 ubiquitin-protein ligase HERC2 OS=Homo sapiens OX=9606 GN=HERC2 PE=1 SV=2	N/A	CK2alpha | Casein kinase II substrate | CK2	22562000	18225000	9119700	11689000	0	0
-KGLLApTpSGNDGTIR	VWCNKKGLLApTSGNDGTIRVW; WCNKKGLLATpSGNDGTIRVWN	NKKGLLAtSGNDGTI | KKGLLATsGNDGTIR	HERC1	pT3445, pS3446	Q15751	HERC1_HUMAN Probable E3 ubiquitin-protein ligase HERC1 OS=Homo sapiens OX=9606 GN=HERC1 PE=1 SV=2	N/A	N/A	7843600	0	241700000	0	0	10042600
-KpSSLVTSK	PTPQDLPQRKpSSLVTSKLAGG; PTPQDLPQRKpSSLVTSKLAG	QDLPQRKsSLVTSKL	ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA	pS108; pS108; pS124; pS131; pS104; pS104; pS120; pS124	O43768; O43768-2; O43768-3; O43768-4; O43768-5; O43768-6; O43768-7; O43768-9	ENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 5 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 6 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 7 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 9 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA	N/A	G protein-coupled receptor kinase 1 substrate	0	0	18629000	0	0	0
-KSpSLVTSK	TPQDLPQRKSpSLVTSKLAGGQ; TPQDLPQRKSpSLVTSKLAG	DLPQRKSsLVTSKLA	ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA	pS109; pS109; pS125; pS132; pS105; pS105; pS121; pS125	O43768; O43768-2; O43768-3; O43768-4; O43768-5; O43768-6; O43768-7; O43768-9	ENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 5 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 6 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 7 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 9 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA	molecular association, regulation; protein conformation; SNCA(DISRUPTS)	G protein-coupled receptor kinase 1 substrate | PKC kinase substrate | PKA kinase substrate | Casein Kinase I substrate | MDC1 BRCT domain binding | GSK3 | AURORA	7090300	8341200	9691500	10030000	1675200	9952100
-LpSPNPWQEK	MLAVDIEDRLpSPNPWQEKREI	VDIEDRLsPNPWQEK	HERC2	pS3462	O95714	HERC2_HUMAN E3 ubiquitin-protein ligase HERC2 OS=Homo sapiens OX=9606 GN=HERC2 PE=1 SV=2	N/A	ERK1, ERK2 Kinase substrate | GSK-3, ERK1, ERK2, CDK5 substrate | WW domain binding	0	11706000	12495000	0	7273000	8877800
-NLLEDDpSDEEEDFFLR	SERRNLLEDDpSDEEEDFFLRG	RNLLEDDsDEEEDFF	VAMP4	pS30	O75379	VAMP4_HUMAN_Vesicle-associated membrane protein 4 OS=Homo sapiens OX=9606 GN=VAMP4 PE=1 SV=2	N/A	CK2alpha | Casein kinase II substrate | Casein Kinase I substrate | b-Adrenergic Receptor kinase substrate | BARD1 BRCT domain binding | CK2 | Csnk2a1	1592100000	973800000	1011600000	1450300000	631970000	878760000
-pSQKQEEENPAEETGEEK	MpSQKQEEENPAE	______MsQKQEEEN	ENSA; ENSA; ENSA; ENSA; ENSA; ENSA	pS2; pS2; pS2; pS2; pS2; pS2	O43768; O43768-2; O43768-3; O43768-4; O43768-8; O43768-9	ENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 8 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 9 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA	N/A	ATM kinase substrate | PKC kinase substrate | PKA kinase substrate	0	0	8765300	0	2355900	14706000
-QLSEpSFK	SKSSSRQLSEpSFKSKEFVSSD	SSRQLSEsFKSKEFV	SSRP1	pS659	Q08945	SSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1	N/A	CK2a2 | CDK7 | PKCalpha | PKCbeta | DNAPK | PKC kinase substrate | PKA kinase substrate | NEK6	68201000	87774000	138300000	95357000	19966000	149110000
-RGpSLEMSSDGEPLSR	SSATSGGRRGpSLEMSSDGEPL	TSGGRRGsLEMSSDG	AEBP2; AEBP2	pS206; pS206	Q6ZN18; Q6ZN18-2	AEBP2_HUMAN Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2 PE=1 SV=2; AEBP2_HUMAN Isoform 2 of Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2	N/A	Casein Kinase II substrate | G protein-coupled receptor kinase 1 substrate | PKC kinase substrate | PKA kinase substrate | PKA | GSK3 | AURORA	19262000	11103000	19454000	0	1816900	22028000
-SDGpSLEDGDDVHR	IEDGGARSDGpSLEDGDDVHRA	GGARSDGsLEDGDDV	SERINC1	pS364	Q9NRX5	SERC1_HUMAN Serine incorporator 1 OS=Homo sapiens OX=9606 GN=SERINC1 PE=1 SV=1	N/A	Casein kinase II substrate | Plk1 kinase substrate | Pyruvate dehydrogenase kinase substrate | CK1 | PLK | PLK1	31407000	17665000	20892000	23194000	5132400	54893000
-SEpSLTAESR	EGGGLMTRSEpSLTAESRLVHT	GLMTRSEsLTAESRL	HERC1	pS1491	Q15751	HERC1_HUMAN Probable E3 ubiquitin-protein ligase HERC1 OS=Homo sapiens OX=9606 GN=HERC1 PE=1 SV=2	N/A	b-Adrenergic Receptor kinase substrate	11766000	13176000	20540000	16963000	4364700	21308000
-STGPTAATGpSNRR	MSTGPTAATGpSNRRLQQTQNQ	GPTAATGsNRRLQQT	VAMP3	pS11	Q15836	VAMP3_HUMAN_Vesicle-associated membrane protein 3 OS=Homo sapiens OX=9606 GN=VAMP3 PE=1 SV=3	N/A	PKCalpha | PKCbeta | PKCzeta | PKC kinase substrate | PKA kinase substrate	3057100	4718800	12052000	5047700	1070900	8333500
-TEDLEATpSEHFK	RNKTEDLEATpSEHFKTTSQKV	TEDLEATsEHFKTTS	VAMP8	pS55	Q9BV40	VAMP8_HUMAN_Vesicle-associated membrane protein 8 OS=Homo sapiens OX=9606 GN=VAMP8 PE=1 SV=1	activity, inhibited; abolish function in SNARE complex during mast cell secretion, reduces in vitro ensemble vesicle fusion	G protein-coupled receptor kinase 1 substrate | Casein Kinase I substrate	20400000	9738500	7862300	0	0	76518000
-TFWpSPELK	SSMNSIKTFWpSPELKKERVLR	NSIKTFWsPELKKER	ERC2	pS187	O15083	ERC2_HUMAN ERC protein 2 OS=Homo sapiens OX=9606 GN=ERC2 PE=1 SV=3	N/A	IKKalpha | IKKbeta | HIPK2 | Casein Kinase II substrate | ERK1, ERK2 Kinase substrate | GSK-3, ERK1, ERK2, CDK5 substrate | WW domain binding	29764000	20957000	24855000	30752000	8304800	23771000
-YFDpSGDYNMAK	CADEMQKYFDpSGDYNMAKAKM; RLQKGQKYFDpSGDYNMAKAKM; MKSVEQKYFDpSGDYNMAKAKM	EMQKYFDsGDYNMAK | KGQKYFDsGDYNMAK | VEQKYFDsGDYNMAK	ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA	pS67; pS67; pS83; pS90; pS63; pS63; pS79; pS83	O43768; O43768-2; O43768-3; O43768-4; O43768-5; O43768-6; O43768-7; O43768-9	ENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 5 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 6 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 7 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 9 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA	molecular association, regulation; cell cycle regulation; PPP2CA(INDUCES)	b-Adrenergic Receptor kinase substrate	323250000	127970000	0	67123000	12790000	71378000
--- a/workflow/ppenrich_suite_wf.ga	Tue Mar 15 12:44:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,678 +0,0 @@
-{
-    "a_galaxy_workflow": "true",
-    "annotation": "phoshpoproteomic enrichment data pre-processing and ANOVA",
-    "creator": [
-        {
-            "class": "Person",
-            "identifier": "0000-0002-2882-0508",
-            "name": "Art Eschenlauer"
-        }
-    ],
-    "format-version": "0.1",
-    "license": "MIT",
-    "name": "ppenrich_suite_wf",
-    "steps": {
-        "0": {
-            "annotation": "The Phospho (STY)Sites.txt file produced by MaxQuant (found in the txt folder).",
-            "content_id": null,
-            "errors": null,
-            "id": 0,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "The Phospho (STY)Sites.txt file produced by MaxQuant (found in the txt folder).",
-                    "name": "Phospho (STY)Sites.txt"
-                }
-            ],
-            "label": "Phospho (STY)Sites.txt",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": -36.30000305175781,
-                "height": 82.19999694824219,
-                "left": 150,
-                "right": 350,
-                "top": -118.5,
-                "width": 200,
-                "x": 150,
-                "y": -118.5
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "f4273d40-f2b8-4ad0-8bcc-91e72bd25fe1",
-            "workflow_outputs": []
-        },
-        "1": {
-            "annotation": "FASTA file of all human canonical isoforms, derived from Swiss-Prot (e.g., merge of https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz and https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz)",
-            "content_id": null,
-            "errors": null,
-            "id": 1,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "FASTA file of all human canonical isoforms, derived from Swiss-Prot (e.g., merge of https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz and https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz)",
-                    "name": "SwissProt_Human_Canonical_Isoform.fasta"
-                }
-            ],
-            "label": "SwissProt_Human_Canonical_Isoform.fasta",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": 278.1000061035156,
-                "height": 102.60000610351562,
-                "left": 376,
-                "right": 576,
-                "top": 175.5,
-                "width": 200,
-                "x": 376,
-                "y": 175.5
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "cb31b0ac-cacc-42ee-bd42-f42d0bdae128",
-            "workflow_outputs": []
-        },
-        "2": {
-            "annotation": "Derived from https://networkin.info/download/networkin_human_predictions_3.1.tsv.xz (which is free for non-commercial use - for required citation, see https://networkin.info/)",
-            "content_id": null,
-            "errors": null,
-            "id": 2,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "Derived from https://networkin.info/download/networkin_human_predictions_3.1.tsv.xz (which is free for non-commercial use - for required citation, see https://networkin.info/)",
-                    "name": "NetworKIN_cutoffscore2.0.tabular"
-                }
-            ],
-            "label": "NetworKIN_cutoffscore2.0.tabular",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": 423.1000061035156,
-                "height": 102.60000610351562,
-                "left": 387,
-                "right": 587,
-                "top": 320.5,
-                "width": 200,
-                "x": 387,
-                "y": 320.5
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "e6ec01b8-ff1a-4c90-a064-b40c5cad75bb",
-            "workflow_outputs": []
-        },
-        "3": {
-            "annotation": "Derived from http://hprd.org/serine_motifs, http://hprd.org/tyrosine_motifs, and http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx",
-            "content_id": null,
-            "errors": null,
-            "id": 3,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "Derived from http://hprd.org/serine_motifs, http://hprd.org/tyrosine_motifs, and http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx",
-                    "name": "pSTY_Motifs.tabular"
-                }
-            ],
-            "label": "pSTY_Motifs.tabular",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": 546.6999969482422,
-                "height": 82.19999694824219,
-                "left": 399,
-                "right": 599,
-                "top": 464.5,
-                "width": 200,
-                "x": 399,
-                "y": 464.5
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "2c59056a-c1b4-4a20-a194-991d56c8b6c2",
-            "workflow_outputs": []
-        },
-        "4": {
-            "annotation": "Derived from Kinase_Substrate_Dataset.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
-            "content_id": null,
-            "errors": null,
-            "id": 4,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "Derived from Kinase_Substrate_Dataset.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
-                    "name": "PSP_Kinase_Substrate_Dataset.tabular"
-                }
-            ],
-            "label": "PSP_Kinase_Substrate_Dataset.tabular",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": 696.1000061035156,
-                "height": 102.60000610351562,
-                "left": 420,
-                "right": 620,
-                "top": 593.5,
-                "width": 200,
-                "x": 420,
-                "y": 593.5
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "987a5891-15f1-4f70-89a8-386447f0bf24",
-            "workflow_outputs": []
-        },
-        "5": {
-            "annotation": "Derived from Regulatory_sites.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
-            "content_id": null,
-            "errors": null,
-            "id": 5,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "Derived from Regulatory_sites.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
-                    "name": "PSP_Regulatory_sites.tabular"
-                }
-            ],
-            "label": "PSP_Regulatory_sites.tabular",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": 820.6999969482422,
-                "height": 82.19999694824219,
-                "left": 436,
-                "right": 636,
-                "top": 738.5,
-                "width": 200,
-                "x": 436,
-                "y": 738.5
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "964d8d21-b063-411a-aee8-372a0d0dfba3",
-            "workflow_outputs": []
-        },
-        "6": {
-            "annotation": "List of alpha cutoff values for significance testing; text file having no header and a single line for each cutoff value.",
-            "content_id": null,
-            "errors": null,
-            "id": 6,
-            "input_connections": {},
-            "inputs": [
-                {
-                    "description": "List of alpha cutoff values for significance testing; text file having no header and a single line for each cutoff value.",
-                    "name": "alpha_levels.tabular"
-                }
-            ],
-            "label": "alpha_levels.tabular",
-            "name": "Input dataset",
-            "outputs": [],
-            "position": {
-                "bottom": 1071.1999969482422,
-                "height": 82.19999694824219,
-                "left": 418,
-                "right": 618,
-                "top": 989,
-                "width": 200,
-                "x": 418,
-                "y": 989
-            },
-            "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
-            "tool_version": null,
-            "type": "data_input",
-            "uuid": "42577db7-d5e5-4f39-b3ad-d0648abb9df3",
-            "workflow_outputs": []
-        },
-        "7": {
-            "annotation": "",
-            "content_id": "mqppep_preproc",
-            "errors": null,
-            "id": 7,
-            "input_connections": {
-                "networkin": {
-                    "id": 2,
-                    "output_name": "output"
-                },
-                "p_sty_motifs": {
-                    "id": 3,
-                    "output_name": "output"
-                },
-                "phosphoSites": {
-                    "id": 0,
-                    "output_name": "output"
-                },
-                "protein_fasta": {
-                    "id": 1,
-                    "output_name": "output"
-                },
-                "psp_kinase_substrate": {
-                    "id": 4,
-                    "output_name": "output"
-                },
-                "psp_regulatory_sites": {
-                    "id": 5,
-                    "output_name": "output"
-                }
-            },
-            "inputs": [
-                {
-                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
-                    "name": "networkin"
-                },
-                {
-                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
-                    "name": "p_sty_motifs"
-                },
-                {
-                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
-                    "name": "phosphoSites"
-                },
-                {
-                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
-                    "name": "protein_fasta"
-                },
-                {
-                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
-                    "name": "psp_kinase_substrate"
-                },
-                {
-                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
-                    "name": "psp_regulatory_sites"
-                }
-            ],
-            "label": null,
-            "name": "MaxQuant Phosphopeptide Preprocessing",
-            "outputs": [
-                {
-                    "name": "phosphoPepIntensities",
-                    "type": "tabular"
-                },
-                {
-                    "name": "enrichGraph",
-                    "type": "pdf"
-                },
-                {
-                    "name": "locProbCutoffGraph",
-                    "type": "pdf"
-                },
-                {
-                    "name": "enrichGraph_svg",
-                    "type": "svg"
-                },
-                {
-                    "name": "locProbCutoffGraph_svg",
-                    "type": "svg"
-                },
-                {
-                    "name": "filteredData_tabular",
-                    "type": "tabular"
-                },
-                {
-                    "name": "quantData_tabular",
-                    "type": "tabular"
-                },
-                {
-                    "name": "mapped_phophopeptides",
-                    "type": "tabular"
-                },
-                {
-                    "name": "melted_phophopeptide_map",
-                    "type": "tabular"
-                },
-                {
-                    "name": "mqppep_output_sqlite",
-                    "type": "sqlite"
-                },
-                {
-                    "name": "preproc_tab",
-                    "type": "tabular"
-                },
-                {
-                    "name": "preproc_csv",
-                    "type": "csv"
-                },
-                {
-                    "name": "preproc_sqlite",
-                    "type": "sqlite"
-                }
-            ],
-            "position": {
-                "bottom": 964.0999755859375,
-                "height": 793.5999755859375,
-                "left": 826.5,
-                "right": 1026.5,
-                "top": 170.5,
-                "width": 200,
-                "x": 826.5,
-                "y": 170.5
-            },
-            "post_job_actions": {
-                "RenameDatasetActionenrichGraph": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.enrichGraph_pdf"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "enrichGraph"
-                },
-                "RenameDatasetActionenrichGraph_svg": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.enrichGraph_svg"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "enrichGraph_svg"
-                },
-                "RenameDatasetActionfilteredData_tabular": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.filteredData"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "filteredData_tabular"
-                },
-                "RenameDatasetActionlocProbCutoffGraph": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.locProbCutoffGraph_pdf"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "locProbCutoffGraph"
-                },
-                "RenameDatasetActionlocProbCutoffGraph_svg": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.locProbCutoffGraph_svg"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "locProbCutoffGraph_svg"
-                },
-                "RenameDatasetActionmapped_phophopeptides": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.ppep_map"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "mapped_phophopeptides"
-                },
-                "RenameDatasetActionmelted_phophopeptide_map": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.melted"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "melted_phophopeptide_map"
-                },
-                "RenameDatasetActionmqppep_output_sqlite": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.ppep_mapping_sqlite"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "mqppep_output_sqlite"
-                },
-                "RenameDatasetActionphosphoPepIntensities": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.ppep_intensities"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "phosphoPepIntensities"
-                },
-                "RenameDatasetActionpreproc_csv": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.preproc_csv"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "preproc_csv"
-                },
-                "RenameDatasetActionpreproc_sqlite": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.preproc_sqlite"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "preproc_sqlite"
-                },
-                "RenameDatasetActionpreproc_tab": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.preproc_tab"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "preproc_tab"
-                },
-                "RenameDatasetActionquantData_tabular": {
-                    "action_arguments": {
-                        "newname": "#{phosphoSites}.quantData"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "quantData_tabular"
-                }
-            },
-            "tool_id": "mqppep_preproc",
-            "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"RuntimeValue\"}, \"p_sty_motifs\": {\"__class__\": \"RuntimeValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"RuntimeValue\"}, \"protein_fasta\": {\"__class__\": \"RuntimeValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"RuntimeValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"RuntimeValue\"}, \"pst_not_py\": \"true\", \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": null,
-            "type": "tool",
-            "uuid": "886043ce-8d9b-474e-b970-4fe9ee6a74fa",
-            "workflow_outputs": [
-                {
-                    "label": "ppep_intensities",
-                    "output_name": "phosphoPepIntensities",
-                    "uuid": "e19a64d1-edee-4119-a72e-456af7a6c056"
-                },
-                {
-                    "label": "enrichGraph_pdf",
-                    "output_name": "enrichGraph",
-                    "uuid": "7e9936d9-9617-4df4-9133-7a04f8d05d26"
-                },
-                {
-                    "label": "locProbCutoffGraph_pdf",
-                    "output_name": "locProbCutoffGraph",
-                    "uuid": "5656cba7-25e2-4362-ae92-1ddac67dee07"
-                },
-                {
-                    "label": "enrichGraph_svg",
-                    "output_name": "enrichGraph_svg",
-                    "uuid": "ca13a22e-a41b-481c-ab87-1f97bbf768e9"
-                },
-                {
-                    "label": "locProbCutoffGraph_svg",
-                    "output_name": "locProbCutoffGraph_svg",
-                    "uuid": "fc7a11f5-30d8-4409-878a-d3b70366711c"
-                },
-                {
-                    "label": "filteredData",
-                    "output_name": "filteredData_tabular",
-                    "uuid": "aab49fc5-a3cf-4479-ac23-8e9272dadf28"
-                },
-                {
-                    "label": "quantData",
-                    "output_name": "quantData_tabular",
-                    "uuid": "23940202-403e-4256-916b-92539db07cdb"
-                },
-                {
-                    "label": "ppep_map",
-                    "output_name": "mapped_phophopeptides",
-                    "uuid": "08ad13d4-c103-4f18-92cc-2c3b58565981"
-                },
-                {
-                    "label": "melted_phosphopeptide_map",
-                    "output_name": "melted_phophopeptide_map",
-                    "uuid": "77cecaeb-8f7c-482e-b78a-e4809b194eb7"
-                },
-                {
-                    "label": "ppep_mapping_sqlite",
-                    "output_name": "mqppep_output_sqlite",
-                    "uuid": "8e53e05a-a47c-4b97-87e4-ebab133ccaea"
-                },
-                {
-                    "label": "preproc_tab",
-                    "output_name": "preproc_tab",
-                    "uuid": "530a8140-9eba-4c87-a76b-4922febc12e7"
-                },
-                {
-                    "label": "preproc_csv",
-                    "output_name": "preproc_csv",
-                    "uuid": "c5f22f05-0bf7-48cf-adc0-c2beffe33169"
-                },
-                {
-                    "label": "preproc_sqlite",
-                    "output_name": "preproc_sqlite",
-                    "uuid": "53424150-7673-40af-ad60-0b4035e0c302"
-                }
-            ]
-        },
-        "8": {
-            "annotation": "Perform ANOVA. For imputing missing values, use median of non-missing values from the same treatment group.",
-            "content_id": "mqppep_anova",
-            "errors": null,
-            "id": 8,
-            "input_connections": {
-                "alpha_file": {
-                    "id": 6,
-                    "output_name": "output"
-                },
-                "input_file": {
-                    "id": 7,
-                    "output_name": "preproc_tab"
-                }
-            },
-            "inputs": [],
-            "label": "MaxQuant Phosphopeptide ANOVA group-median imputed",
-            "name": "MaxQuant Phosphopeptide ANOVA",
-            "outputs": [
-                {
-                    "name": "imputed_data_file",
-                    "type": "tabular"
-                },
-                {
-                    "name": "report_file",
-                    "type": "html"
-                }
-            ],
-            "position": {
-                "bottom": 1349,
-                "height": 256,
-                "left": 1058,
-                "right": 1258,
-                "top": 1093,
-                "width": 200,
-                "x": 1058,
-                "y": 1093
-            },
-            "post_job_actions": {
-                "RenameDatasetActionimputed_data_file": {
-                    "action_arguments": {
-                        "newname": "#{input_file}.intensities_group-mean-imputed_QN_LT"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "imputed_data_file"
-                },
-                "RenameDatasetActionreport_file": {
-                    "action_arguments": {
-                        "newname": "#{input_file}.intensities_group-mean-imputed_report (download/unzip to view)"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "report_file"
-                }
-            },
-            "tool_id": "mqppep_anova",
-            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": null,
-            "type": "tool",
-            "uuid": "a3cb902d-8ef6-4f84-bed3-80b2b20d1916",
-            "workflow_outputs": [
-                {
-                    "label": "intensities_group-mean-imputed_QN_LT",
-                    "output_name": "imputed_data_file",
-                    "uuid": "ef19dcd3-8f3e-4fc4-829e-dae6719ff1cc"
-                },
-                {
-                    "label": "intensities_group-mean-imputed_report",
-                    "output_name": "report_file",
-                    "uuid": "26bb93b0-bc11-4455-a280-241253b21981"
-                }
-            ]
-        },
-        "9": {
-            "annotation": "Perform ANOVA. For imputing missing values, create random values.",
-            "content_id": "mqppep_anova",
-            "errors": null,
-            "id": 9,
-            "input_connections": {
-                "alpha_file": {
-                    "id": 6,
-                    "output_name": "output"
-                },
-                "input_file": {
-                    "id": 7,
-                    "output_name": "preproc_tab"
-                }
-            },
-            "inputs": [],
-            "label": "MaxQuant Phosphopeptide ANOVA randomly imputed",
-            "name": "MaxQuant Phosphopeptide ANOVA",
-            "outputs": [
-                {
-                    "name": "imputed_data_file",
-                    "type": "tabular"
-                },
-                {
-                    "name": "report_file",
-                    "type": "html"
-                }
-            ],
-            "position": {
-                "bottom": 1186,
-                "height": 256,
-                "left": 1308,
-                "right": 1508,
-                "top": 930,
-                "width": 200,
-                "x": 1308,
-                "y": 930
-            },
-            "post_job_actions": {
-                "RenameDatasetActionimputed_data_file": {
-                    "action_arguments": {
-                        "newname": "#{input_file}.intensities_randomly-imputed_QN_LT"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "imputed_data_file"
-                },
-                "RenameDatasetActionreport_file": {
-                    "action_arguments": {
-                        "newname": "#{input_file}.intensities_randomly-imputed_report (download/unzip to view)"
-                    },
-                    "action_type": "RenameDatasetAction",
-                    "output_name": "report_file"
-                }
-            },
-            "tool_id": "mqppep_anova",
-            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"0.2\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": null,
-            "type": "tool",
-            "uuid": "217d92af-f6d6-4fd3-a78a-090d8afd3ae0",
-            "workflow_outputs": [
-                {
-                    "label": "intensities_randomly-imputed_QN_LT",
-                    "output_name": "imputed_data_file",
-                    "uuid": "925d734f-f9d8-49e8-aebb-c8d7598d45b2"
-                },
-                {
-                    "label": "intensities_randomly-imputed_report",
-                    "output_name": "report_file",
-                    "uuid": "4ab5f1b1-d04e-4634-8765-265122bc1064"
-                }
-            ]
-        }
-    },
-    "tags": [
-        "ppenrich"
-    ],
-    "uuid": "c54c2b2e-8080-445c-bc3e-43950c89d4e4",
-    "version": 3
-}
\ No newline at end of file