edger: edger.R comparison

comparison edger.R @ 8:ea85027d986c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 215a0f27f3de87506895ac655f801c40e8c7edbc"

author	iuc
date	Thu, 03 Jun 2021 19:33:47 +0000
parents	7cf6248bc173
children	21891a3214b9

comparison

equal deleted inserted replaced

-:47094543a5ff
+:ea85027d986c
 # ARGS: htmlPath", "R", 1, "character"      -Path to html file linking to other outputs
 #       outPath", "o", 1, "character"       -Path to folder to write all output to
 #       filesPath", "j", 2, "character"     -JSON list object if multiple files input
 #       matrixPath", "m", 2, "character"    -Path to count matrix
 #       factFile", "f", 2, "character"      -Path to factor information file
 #       factInput", "i", 2, "character"     -String containing factors if manually input
 #       annoPath", "a", 2, "character"      -Path to input containing gene annotations
 #       contrastData", "C", 1, "character"  -String containing contrasts of interest
 #       cpmReq", "c", 2, "double"           -Float specifying cpm requirement
 #       cntReq", "z", 2, "integer"          -Integer specifying minimum total count requirement
 #       sampleReq", "s", 2, "integer"       -Integer specifying cpm requirement
 #       normCounts", "x", 0, "logical"      -String specifying if normalised counts should be output
 #       rdaOpt", "r", 0, "logical"          -String specifying if RData should be output
 #       lfcReq", "l", 1, "double"           -Float specifying the log-fold-change requirement
 #       pValReq", "p", 1, "double"          -Float specifying the p-value requirement
 #       pAdjOpt", "d", 1, "character"       -String specifying the p-value adjustment method
 #       normOpt", "n", 1, "character"       -String specifying type of normalisation used
 #       robOpt", "b", 0, "logical"          -String specifying if robust options should be used
 #       lrtOpt", "t", 0, "logical"          -String specifying whether to perform LRT test instead
 #
 # OUT:
 #       MDS Plot
 #       BCV Plot
 #       QL Plot
 #       MD Plot
 #       Expression Table
 #       HTML file linking to the ouputs
 #
 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
 # Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper)
 # Record starting time
-timeStart <- as.character(Sys.time())
+time_start <- as.character(Sys.time())
 # setup R error handling to go to stderr
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = F, error = function() {
+cat(geterrmessage(), file = stderr())
+q("no", 1, F)
+})
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 # Load all required libraries
-library(methods, quietly=TRUE, warn.conflicts=FALSE)
+library(methods, quietly = TRUE, warn.conflicts = FALSE)
-library(statmod, quietly=TRUE, warn.conflicts=FALSE)
+library(statmod, quietly = TRUE, warn.conflicts = FALSE)
-library(splines, quietly=TRUE, warn.conflicts=FALSE)
+library(splines, quietly = TRUE, warn.conflicts = FALSE)
-library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
+library(edgeR, quietly = TRUE, warn.conflicts = FALSE)
-library(limma, quietly=TRUE, warn.conflicts=FALSE)
+library(limma, quietly = TRUE, warn.conflicts = FALSE)
-library(scales, quietly=TRUE, warn.conflicts=FALSE)
+library(scales, quietly = TRUE, warn.conflicts = FALSE)
-library(getopt, quietly=TRUE, warn.conflicts=FALSE)
+library(getopt, quietly = TRUE, warn.conflicts = FALSE)
 ################################################################################
 ### Function Delcaration
 ################################################################################
 # Function to sanitise contrast equations so there are no whitespaces
 # surrounding the arithmetic operators, leading or trailing whitespace
-sanitiseEquation <- function(equation) {
+sanitise_equation <- function(equation) {
 equation <- gsub(" *[+] *", "+", equation)
 equation <- gsub(" *[-] *", "-", equation)
 equation <- gsub(" *[/] *", "/", equation)
 equation <- gsub(" *[*] *", "*", equation)
 equation <- gsub("^\\s+|\\s+$", "", equation)
 return(equation)
 }
 # Function to sanitise group information
-sanitiseGroups <- function(string) {
+sanitise_groups <- function(string) {
 string <- gsub(" *[,] *", ",", string)
 string <- gsub("^\\s+|\\s+$", "", string)
 return(string)
 }
 # Function to change periods to whitespace in a string
-unmake.names <- function(string) {
+unmake_names <- function(string) {
-string <- gsub(".", " ", string, fixed=TRUE)
+string <- gsub(".", " ", string, fixed = TRUE)
 return(string)
 }
 # Generate output folder and paths
-makeOut <- function(filename) {
+make_out <- function(filename) {
-return(paste0(opt$outPath, "/", filename))
+return(paste0(out_path, "/", filename))
 }
 # Generating design information
-pasteListName <- function(string) {
+paste_listname <- function(string) {
 return(paste0("factors$", string))
 }
 # Create cata function: default path set, default seperator empty and appending
 # true by default (Ripped straight from the cat function with altered argument
 # defaults)
-cata <- function(..., file=opt$htmlPath, sep="", fill=FALSE, labels=NULL,
+cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
-append=TRUE) {
+append = TRUE) {
-if (is.character(file))
+if (is.character(file)) {
-if (file == "")
+if (file == "") {
 file <- stdout()
-else if (substring(file, 1L, 1L) == "|") {
+} else if (substring(file, 1L, 1L) == "|") {
 file <- pipe(substring(file, 2L), "w")
 on.exit(close(file))
 }
 else {
 file <- file(file, ifelse(append, "a", "w"))
 on.exit(close(file))
 }
-.Internal(cat(list(...), file, sep, fill, labels, append))
+}
+.Internal(cat(list(...), file, sep, fill, labels, append))
 }
 # Function to write code for html head and title
-HtmlHead <- function(title) {
+html_head <- function(title) {
 cata("<head>\n")
 cata("<title>", title, "</title>\n")
 cata("</head>\n")
 }
 # Function to write code for html links
-HtmlLink <- function(address, label=address) {
+html_link <- function(address, label = address) {
 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
 }
 # Function to write code for html images
-HtmlImage <- function(source, label=source, height=600, width=600) {
+html_image <- function(source, label = source, height = 600, width = 600) {
 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
 cata("\" width=\"", width, "\"/>\n")
 }
 # Function to write code for html list items
-ListItem <- function(...) {
+list_item <- function(...) {
 cata("<li>", ..., "</li>\n")
 }
-TableItem <- function(...) {
+table_item <- function(...) {
 cata("<td>", ..., "</td>\n")
 }
-TableHeadItem <- function(...) {
+table_head_item <- function(...) {
 cata("<th>", ..., "</th>\n")
 }
 ################################################################################
 ### Input Processing
 ################################################################################
 # Collect arguments from command line
-args <- commandArgs(trailingOnly=TRUE)
+args <- commandArgs(trailingOnly = TRUE)
 # Get options, using the spec as defined by the enclosed list.
 # Read the options from the default: commandArgs(TRUE).
 spec <- matrix(c(
 "htmlPath", "R", 1, "character",
 "outPath", "o", 1, "character",
 "filesPath", "j", 2, "character",
 "matrixPath", "m", 2, "character",
 "factFile", "f", 2, "character",
 "factInput", "i", 2, "character",
 "annoPath", "a", 2, "character",
 "contrastData", "C", 1, "character",
 "cpmReq", "c", 1, "double",
 "totReq", "y", 0, "logical",
 "cntReq", "z", 1, "integer",
 "sampleReq", "s", 1, "integer",
 "normCounts", "x", 0, "logical",
 "rdaOpt", "r", 0, "logical",
 "lfcReq", "l", 1, "double",
 "pValReq", "p", 1, "double",
 "pAdjOpt", "d", 1, "character",
 "normOpt", "n", 1, "character",
 "robOpt", "b", 0, "logical",
-"lrtOpt", "t", 0, "logical"),
+"lrtOpt", "t", 0, "logical"
-byrow=TRUE, ncol=4)
+),
+byrow = TRUE, ncol = 4
+)
 opt <- getopt(spec)
 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
 cat("A counts matrix (or a set of counts files) is required.\n")
-q(status=1)
+q(status = 1)
 }
 if (is.null(opt$cpmReq)) {
-filtCPM <- FALSE
+filt_cpm <- FALSE
 } else {
-filtCPM <- TRUE
+filt_cpm <- TRUE
 }
 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
-filtSmpCount <- FALSE
+filt_smpcount <- FALSE
 } else {
-filtSmpCount <- TRUE
+filt_smpcount <- TRUE
 }
 if (is.null(opt$totReq)) {
-filtTotCount <- FALSE
+filt_totcount <- FALSE
 } else {
-filtTotCount <- TRUE
+filt_totcount <- TRUE
 }
 if (is.null(opt$lrtOpt)) {
-wantLRT <- FALSE
+want_lrt <- FALSE
 } else {
-wantLRT <- TRUE
+want_lrt <- TRUE
 }
 if (is.null(opt$rdaOpt)) {
-wantRda <- FALSE
+want_rda <- FALSE
 } else {
-wantRda <- TRUE
+want_rda <- TRUE
 }
 if (is.null(opt$annoPath)) {
-haveAnno <- FALSE
+have_anno <- FALSE
 } else {
-haveAnno <- TRUE
+have_anno <- TRUE
 }
 if (is.null(opt$normCounts)) {
-wantNorm <- FALSE
+want_norm <- FALSE
 } else {
-wantNorm <- TRUE
+want_norm <- TRUE
 }
 if (is.null(opt$robOpt)) {
-wantRobust <- FALSE
+want_robust <- FALSE
 } else {
-wantRobust <- TRUE
+want_robust <- TRUE
 }
 if (!is.null(opt$filesPath)) {
 # Process the separate count files (adapted from DESeq2 wrapper)
 library("rjson")
 parser <- newJSONParser()
 parser$addData(opt$filesPath)
-factorList <- parser$getObject()
+factor_list <- parser$getObject()
-factors <- sapply(factorList, function(x) x[[1]])
+factors <- sapply(factor_list, function(x) x[[1]])
-filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+filenames_in <- unname(unlist(factor_list[[1]][[2]]))
-sampleTable <- data.frame(sample=basename(filenamesIn),
+sampletable <- data.frame(
-filename=filenamesIn,
+sample = basename(filenames_in),
-row.names=filenamesIn,
+filename = filenames_in,
-stringsAsFactors=FALSE)
+row.names = filenames_in,
-for (factor in factorList) {
+stringsAsFactors = FALSE
-factorName <- factor[[1]]
+)
-sampleTable[[factorName]] <- character(nrow(sampleTable))
+for (factor in factor_list) {
-lvls <- sapply(factor[[2]], function(x) names(x))
+factorname  <- factor[[1]]
-for (i in seq_along(factor[[2]])) {
+sampletable[[factorname]] <- character(nrow(sampletable))
-files <- factor[[2]][[i]][[1]]
+lvls <- sapply(factor[[2]], function(x) names(x))
-sampleTable[files,factorName] <- lvls[i]
+for (i in seq_along(factor[[2]])) {
-}
+files <- factor[[2]][[i]][[1]]
-sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+sampletable[files, factorname] <- lvls[i]
 }
-rownames(sampleTable) <- sampleTable$sample
+sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
-rem <- c("sample","filename")
+}
-factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
+rownames(sampletable) <- sampletable$sample
+rem <- c("sample", "filename")
-#read in count files and create single table
+factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
-countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
-counts <- do.call("cbind", countfiles)
+# read in count files and create single table
+countfiles <- lapply(sampletable$filename, function(x) {
-} else {
+read.delim(x, row.names = 1)
-# Process the single count matrix
+})
-counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE)
+counts <- do.call("cbind", countfiles)
-row.names(counts) <- counts[, 1]
+} else {
-counts <- counts[ , -1]
+# Process the single count matrix
-countsRows <- nrow(counts)
+counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
+row.names(counts) <- counts[, 1]
-# Process factors
+counts <- counts[, -1]
-if (is.null(opt$factInput)) {
+countsrows <- nrow(counts)
-factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
-# check samples names match
+# Process factors
-if(!any(factorData[, 1] %in% colnames(counts)))
+if (is.null(opt$factInput)) {
-stop("Sample IDs in factors file and count matrix don't match")
+factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE)
-# order samples as in counts matrix
+# check samples names match
-factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
+if (!any(factordata[, 1] %in% colnames(counts))) {
-factors <- factorData[, -1, drop=FALSE]
+stop("Sample IDs in factors file and count matrix don't match")
-}  else {
-factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
-factorData <- list()
-for (fact in factors) {
-newFact <- unlist(strsplit(fact, split="::"))
-factorData <- rbind(factorData, newFact)
-} # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
-# Set the row names to be the name of the factor and delete first row
-row.names(factorData) <- factorData[, 1]
-factorData <- factorData[, -1]
-factorData <- sapply(factorData, sanitiseGroups)
-factorData <- sapply(factorData, strsplit, split=",")
-factorData <- sapply(factorData, make.names)
-# Transform factor data into data frame of R factor objects
-factors <- data.frame(factorData)
 }
-}
+# order samples as in counts matrix
+factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
-# if annotation file provided
+factors <- factordata[, -1, drop = FALSE]
-if (haveAnno) {
+} else {
-geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE)
+factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
-}
+factordata <- list()
+for (fact in factors) {
-#Create output directory
+newfact <- unlist(strsplit(fact, split = "::"))
-dir.create(opt$outPath, showWarnings=FALSE)
+factordata <- rbind(factordata, newfact)
+} # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+# Set the row names to be the name of the factor and delete first row
+row.names(factordata) <- factordata[, 1]
+factordata <- factordata[, -1]
+factordata <- sapply(factordata, sanitise_groups)
+factordata <- sapply(factordata, strsplit, split = ",")
+factordata <- sapply(factordata, make.names)
+# Transform factor data into data frame of R factor objects
+factors <- data.frame(factordata)
+}
+}
+# if annotation file provided
+if (have_anno) {
+geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
+}
+# Create output directory
+out_path <- opt$outPath
+dir.create(out_path, showWarnings = FALSE)
 # Split up contrasts separated by comma into a vector then sanitise
-contrastData <- unlist(strsplit(opt$contrastData, split=","))
+contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
-contrastData <- sanitiseEquation(contrastData)
+contrast_data <- sanitise_equation(contrast_data)
-contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
+contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE)
-bcvOutPdf <- makeOut("bcvplot.pdf")
+bcv_pdf <- make_out("bcvplot.pdf")
-bcvOutPng <- makeOut("bcvplot.png")
+bcv_png <- make_out("bcvplot.png")
-qlOutPdf <- makeOut("qlplot.pdf")
+ql_pdf <- make_out("qlplot.pdf")
-qlOutPng <- makeOut("qlplot.png")
+ql_png <- make_out("qlplot.png")
-mdsOutPdf <- character()   # Initialise character vector
+mds_pdf <- character() # Initialise character vector
-mdsOutPng <- character()
+mds_png <- character()
-for (i in 1:ncol(factors)) {
+for (i in seq_len(ncol(factors))) {
-mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
+mds_pdf[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".pdf"))
-mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+mds_png[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".png"))
 }
-mdOutPdf <- character()
+md_pdf <- character()
-mdOutPng <- character()
+md_png <- character()
-topOut <- character()
+top_out <- character()
-for (i in 1:length(contrastData)) {
+for (i in seq_along(contrast_data)) {
-mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf"))
+md_pdf[i] <- make_out(paste0("mdplot_", contrast_data[i], ".pdf"))
-mdOutPng[i] <- makeOut(paste0("mdplot_", contrastData[i], ".png"))
+md_png[i] <- make_out(paste0("mdplot_", contrast_data[i], ".png"))
-topOut[i] <- makeOut(paste0("edgeR_", contrastData[i], ".tsv"))
+top_out[i] <- make_out(paste0("edgeR_", contrast_data[i], ".tsv"))
-}   # Save output paths for each contrast as vectors
+} # Save output paths for each contrast as vectors
-normOut <- makeOut("edgeR_normcounts.tsv")
+norm_out <- make_out("edgeR_normcounts.tsv")
-rdaOut <- makeOut("edgeR_analysis.RData")
+rda_out <- make_out("edgeR_analysis.RData")
-sessionOut <- makeOut("session_info.txt")
+session_out <- make_out("session_info.txt")
 # Initialise data for html links and images, data frame with columns Label and
 # Link
-linkData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE)
+link_data <- data.frame(Label = character(), Link = character(), stringsAsFactors = FALSE)
-imageData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE)
+image_data <- data.frame(Label = character(), Link = character(), stringsAsFactors = FALSE)
 # Initialise vectors for storage of up/down/neutral regulated counts
-upCount <- numeric()
+up_count <- numeric()
-downCount <- numeric()
+down_count <- numeric()
-flatCount <- numeric()
+flat_count <- numeric()
 ################################################################################
 ### Data Processing
 ################################################################################
 # Extract counts and annotation data
 data <- list()
 data$counts <- counts
-if (haveAnno) {
+if (have_anno) {
 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
-annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
+annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
 data$genes <- annoord
 } else {
-data$genes <- data.frame(GeneID=row.names(counts))
+data$genes <- data.frame(GeneID = row.names(counts))
 }
 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
 # samples. Default is no filtering
-preFilterCount <- nrow(data$counts)
+prefilter_count <- nrow(data$counts)
-if (filtCPM || filtSmpCount || filtTotCount) {
+if (filt_cpm || filt_smpcount || filt_totcount) {
+if (filt_totcount) {
-if (filtTotCount) {
+keep <- rowSums(data$counts) >= opt$cntReq
-keep <- rowSums(data$counts) >= opt$cntReq
+} else if (filt_smpcount) {
-} else if (filtSmpCount) {
+keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
-keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+} else if (filt_cpm) {
-} else if (filtCPM) {
+keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
-keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+}
-}
+data$counts <- data$counts[keep, ]
-data$counts <- data$counts[keep, ]
+data$genes <- data$genes[keep, , drop = FALSE]
-data$genes <- data$genes[keep, , drop=FALSE]
+}
-}
+postfilter_count <- nrow(data$counts)
-postFilterCount <- nrow(data$counts)
+filtered_count <- prefilter_count - postfilter_count
-filteredCount <- preFilterCount-postFilterCount
-# Creating naming data
-samplenames <- colnames(data$counts)
-sampleanno <- data.frame("sampleID"=samplenames, factors)
-# Generating the DGEList object "data"
-data$samples <- sampleanno
-data$samples$lib.size <- colSums(data$counts)
-data$samples$norm.factors <- 1
-row.names(data$samples) <- colnames(data$counts)
-data <- new("DGEList", data)
 # Name rows of factors according to their sample
 row.names(factors) <- names(data$counts)
-factorList <- sapply(names(factors), pasteListName)
+factor_list <- sapply(names(factors), paste_listname)
-formula <- "~0"
+# Generating the DGEList object "data"
-for (i in 1:length(factorList)) {
+samplenames <- colnames(data$counts)
-formula <- paste(formula, factorList[i], sep="+")
+genes <- data$genes
+data <- DGEList(data$counts)
+colnames(data) <- samplenames
+data$samples <- factors
+data$genes <- genes
+formula <- "~0"
+for (i in seq_along(factor_list)) {
+formula <- paste(formula, factor_list[i], sep = "+")
 }
 formula <- formula(formula)
 design <- model.matrix(formula)
-for (i in 1:length(factorList)) {
+for (i in seq_along(factor_list)) {
-colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
+colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
 }
 # Calculating normalising factor, estimating dispersion
-data <- calcNormFactors(data, method=opt$normOpt)
+data <- calcNormFactors(data, method = opt$normOpt)
-if (wantRobust) {
+if (want_robust) {
-data <- estimateDisp(data, design=design, robust=TRUE)
+data <- estimateDisp(data, design = design, robust = TRUE)
 } else {
-data <- estimateDisp(data, design=design)
+data <- estimateDisp(data, design = design)
 }
 # Generate contrasts information
-contrasts <- makeContrasts(contrasts=contrastData, levels=design)
+contrasts <- makeContrasts(contrasts = contrast_data, levels = design)
 ################################################################################
 ### Data Output
 ################################################################################
 # Plot MDS
 labels <- names(counts)
 # MDS plot
-png(mdsOutPng, width=600, height=600)
+png(mds_png, width = 600, height = 600)
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
-imgName <- paste0("MDS Plot_", names(factors)[1], ".png")
+img_name <- paste0("MDS Plot_", names(factors)[1], ".png")
-imgAddr <- paste0("mdsplot_", names(factors)[1], ".png")
+img_addr <- paste0("mdsplot_", names(factors)[1], ".png")
-imageData[1, ] <- c(imgName, imgAddr)
+image_data[1, ] <- c(img_name, img_addr)
 invisible(dev.off())
-pdf(mdsOutPdf)
+pdf(mds_pdf)
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
-linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf")
+link_name <- paste0("MDS Plot_", names(factors)[1], ".pdf")
-linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf")
+link_addr <- paste0("mdsplot_", names(factors)[1], ".pdf")
-linkData[1, ] <- c(linkName, linkAddr)
+link_data[1, ] <- c(link_name, link_addr)
 invisible(dev.off())
 # If additional factors create additional MDS plots coloured by factor
 if (ncol(factors) > 1) {
 for (i in 2:ncol(factors)) {
-png(mdsOutPng[i], width=600, height=600)
+png(mds_png[i], width = 600, height = 600)
-plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
-imgName <- paste0("MDS Plot_", names(factors)[i], ".png")
+img_name <- paste0("MDS Plot_", names(factors)[i], ".png")
-imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+img_addr <- paste0("mdsplot_", names(factors)[i], ".png")
-imageData <- rbind(imageData, c(imgName, imgAddr))
+image_data <- rbind(image_data, c(img_name, img_addr))
 invisible(dev.off())
-pdf(mdsOutPdf[i])
+pdf(mds_pdf[i])
-plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
-linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf")
+link_name <- paste0("MDS Plot_", names(factors)[i], ".pdf")
-linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+link_addr <- paste0("mdsplot_", names(factors)[i], ".pdf")
-linkData <- rbind(linkData, c(linkName, linkAddr))
+link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())
 }
 }
 # BCV Plot
-png(bcvOutPng, width=600, height=600)
+png(bcv_png, width = 600, height = 600)
-plotBCV(data, main="BCV Plot")
+plotBCV(data, main = "BCV Plot")
-imgName <- "BCV Plot"
+img_name <- "BCV Plot"
-imgAddr <- "bcvplot.png"
+img_addr <- "bcvplot.png"
-imageData <- rbind(imageData, c(imgName, imgAddr))
+image_data <- rbind(image_data, c(img_name, img_addr))
 invisible(dev.off())
-pdf(bcvOutPdf)
+pdf(bcv_pdf)
-plotBCV(data, main="BCV Plot")
+plotBCV(data, main = "BCV Plot")
-linkName <- paste0("BCV Plot.pdf")
+link_name <- paste0("BCV Plot.pdf")
-linkAddr <- paste0("bcvplot.pdf")
+link_addr <- paste0("bcvplot.pdf")
-linkData <- rbind(linkData, c(linkName, linkAddr))
+link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())
 # Generate fit
-if (wantLRT) {
+if (want_lrt) {
+fit <- glmFit(data, design)
-fit <- glmFit(data, design)
+} else {
+if (want_robust) {
-} else {
+fit <- glmQLFit(data, design, robust = TRUE)
+} else {
-if (wantRobust) {
+fit <- glmQLFit(data, design)
-fit <- glmQLFit(data, design, robust=TRUE)
+}
-} else {
-fit <- glmQLFit(data, design)
+# Plot QL dispersions
-}
+png(ql_png, width = 600, height = 600)
+plotQLDisp(fit, main = "QL Plot")
-# Plot QL dispersions
+img_name <- "QL Plot"
-png(qlOutPng, width=600, height=600)
+img_addr <- "qlplot.png"
-plotQLDisp(fit, main="QL Plot")
+image_data <- rbind(image_data, c(img_name, img_addr))
-imgName <- "QL Plot"
+invisible(dev.off())
-imgAddr <- "qlplot.png"
-imageData <- rbind(imageData, c(imgName, imgAddr))
+pdf(ql_pdf)
-invisible(dev.off())
+plotQLDisp(fit, main = "QL Plot")
+link_name <- "QL Plot.pdf"
-pdf(qlOutPdf)
+link_addr <- "qlplot.pdf"
-plotQLDisp(fit, main="QL Plot")
+link_data <- rbind(link_data, c(link_name, link_addr))
-linkName <- "QL Plot.pdf"
+invisible(dev.off())
-linkAddr <- "qlplot.pdf"
+}
-linkData <- rbind(linkData, c(linkName, linkAddr))
-invisible(dev.off())
+# Save normalised counts (log2cpm)
-}
+if (want_norm) {
+normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
-# Save normalised counts (log2cpm)
+normalised_counts <- data.frame(data$genes, normalised_counts)
-if (wantNorm) {
+write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
-normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE)
+link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
-normalisedCounts <- data.frame(data$genes, normalisedCounts)
+}
-write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
-linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
-}
+for (i in seq_along(contrast_data)) {
+if (want_lrt) {
+res <- glmLRT(fit, contrast = contrasts[, i])
-for (i in 1:length(contrastData)) {
+} else {
-if (wantLRT) {
+res <- glmQLFTest(fit, contrast = contrasts[, i])
-res <- glmLRT(fit, contrast=contrasts[, i])
+}
-} else {
-res <- glmQLFTest(fit, contrast=contrasts[, i])
+status <- decideTestsDGE(res,
-}
+adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
+lfc = opt$lfcReq
-status = decideTestsDGE(res, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
+)
-lfc=opt$lfcReq)
+sum_status <- summary(status)
-sumStatus <- summary(status)
+# Collect counts for differential expression
-# Collect counts for differential expression
+up_count[i] <- sum_status["Up", ]
-upCount[i] <- sumStatus["Up", ]
+down_count[i] <- sum_status["Down", ]
-downCount[i] <- sumStatus["Down", ]
+flat_count[i] <- sum_status["NotSig", ]
-flatCount[i] <- sumStatus["NotSig", ]
+# Write top expressions table
-# Write top expressions table
+top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
-top <- topTags(res, adjust.method=opt$pAdjOpt, n=Inf, sort.by="PValue")
+write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
-write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
+link_name <- paste0("edgeR_", contrast_data[i], ".tsv")
-linkName <- paste0("edgeR_", contrastData[i], ".tsv")
+link_addr <- paste0("edgeR_", contrast_data[i], ".tsv")
-linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
+link_data <- rbind(link_data, c(link_name, link_addr))
-linkData <- rbind(linkData, c(linkName, linkAddr))
+# Plot MD (log ratios vs mean difference) using limma package
-# Plot MD (log ratios vs mean difference) using limma package
+pdf(md_pdf[i])
-pdf(mdOutPdf[i])
+limma::plotMD(res,
-limma::plotMD(res, status=status,
+status = status,
-main=paste("MD Plot:", unmake.names(contrastData[i])),
+main = paste("MD Plot:", unmake_names(contrast_data[i])),
-hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
-xlab="Average Expression", ylab="logFC")
+xlab = "Average Expression", ylab = "logFC"
+)
-abline(h=0, col="grey", lty=2)
+abline(h = 0, col = "grey", lty = 2)
-linkName <- paste0("MD Plot_", contrastData[i], ".pdf")
-linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
+link_name <- paste0("MD Plot_", contrast_data[i], ".pdf")
-linkData <- rbind(linkData, c(linkName, linkAddr))
+link_addr <- paste0("mdplot_", contrast_data[i], ".pdf")
-invisible(dev.off())
+link_data <- rbind(link_data, c(link_name, link_addr))
+invisible(dev.off())
-png(mdOutPng[i], height=600, width=600)
-limma::plotMD(res, status=status,
+png(md_png[i], height = 600, width = 600)
-main=paste("MD Plot:", unmake.names(contrastData[i])),
+limma::plotMD(res,
-hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+status = status,
-xlab="Average Expression", ylab="logFC")
+main = paste("MD Plot:", unmake_names(contrast_data[i])),
+hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
-abline(h=0, col="grey", lty=2)
+xlab = "Average Expression", ylab = "logFC"
+)
-imgName <- paste0("MD Plot_", contrastData[i], ".png")
-imgAddr <- paste0("mdplot_", contrastData[i], ".png")
+abline(h = 0, col = "grey", lty = 2)
-imageData <- rbind(imageData, c(imgName, imgAddr))
-invisible(dev.off())
+img_name <- paste0("MD Plot_", contrast_data[i], ".png")
-}
+img_addr <- paste0("mdplot_", contrast_data[i], ".png")
-sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
+image_data <- rbind(image_data, c(img_name, img_addr))
-row.names(sigDiff) <- contrastData
+invisible(dev.off())
+}
+sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count)
+row.names(sig_diff) <- contrast_data
 # Save relevant items as rda object
-if (wantRda) {
+if (want_rda) {
-if (wantNorm) {
+if (want_norm) {
-save(counts, data, status, normalisedCounts, labels, factors, fit, res, top, contrasts, design,
+save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
-file=rdaOut, ascii=TRUE)
+file = rda_out, ascii = TRUE
-} else {
+)
-save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
+} else {
-file=rdaOut, ascii=TRUE)
+save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
-}
+file = rda_out, ascii = TRUE
-linkData <- rbind(linkData, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
+)
+}
+link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
 }
 # Record session info
-writeLines(capture.output(sessionInfo()), sessionOut)
+writeLines(capture.output(sessionInfo()), session_out)
-linkData <- rbind(linkData, c("Session Info", "session_info.txt"))
+link_data <- rbind(link_data, c("Session Info", "session_info.txt"))
 # Record ending time and calculate total run time
-timeEnd <- as.character(Sys.time())
+time_end <- as.character(Sys.time())
-timeTaken <- capture.output(round(difftime(timeEnd, timeStart), digits=3))
+time_taken <- capture.output(round(difftime(time_end, time_start), digits = 3))
-timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE)
+time_taken <- gsub("Time difference of ", "", time_taken, fixed = TRUE)
 ################################################################################
 ### HTML Generation
 ################################################################################
 # Clear file
-cat("", file=opt$htmlPath)
+cat("", file = opt$htmlPath)
 cata("<html>\n")
 cata("<body>\n")
 cata("<h3>edgeR Analysis Output:</h3>\n")
 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n")
-HtmlImage(imageData$Link[1], imageData$Label[1])
+html_image(image_data$Link[1], image_data$Label[1])
-for (i in 2:nrow(imageData)) {
+for (i in 2:nrow(image_data)) {
-HtmlImage(imageData$Link[i], imageData$Label[i])
+html_image(image_data$Link[i], image_data$Label[i])
 }
 cata("<h4>Differential Expression Counts:</h4>\n")
 cata("<table border=\"1\" cellpadding=\"4\">\n")
 cata("<tr>\n")
-TableItem()
+table_item()
-for (i in colnames(sigDiff)) {
+for (i in colnames(sig_diff)) {
-TableHeadItem(i)
+table_head_item(i)
 }
 cata("</tr>\n")
-for (i in 1:nrow(sigDiff)) {
+for (i in seq_len(nrow(sig_diff))) {
 cata("<tr>\n")
-TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+table_head_item(unmake_names(row.names(sig_diff)[i]))
-for (j in 1:ncol(sigDiff)) {
+for (j in seq_len(ncol(sig_diff))) {
-TableItem(as.character(sigDiff[i, j]))
+table_item(as.character(sig_diff[i, j]))
+}
+cata("</tr>\n")
+}
+cata("</table>")
+cata("<h4>Plots:</h4>\n")
+for (i in seq_len(nrow(link_data))) {
+if (grepl(".pdf", link_data$Link[i])) {
+html_link(link_data$Link[i], link_data$Label[i])
+}
+}
+cata("<h4>Tables:</h4>\n")
+for (i in seq_len(nrow(link_data))) {
+if (grepl(".tsv", link_data$Link[i])) {
+html_link(link_data$Link[i], link_data$Label[i])
+}
+}
+if (want_rda) {
+cata("<h4>R Data Objects:</h4>\n")
+for (i in seq_len(nrow(link_data))) {
+if (grepl(".RData", link_data$Link[i])) {
+html_link(link_data$Link[i], link_data$Label[i])
 }
-cata("</tr>\n")
+}
-}
-cata("</table>")
-cata("<h4>Plots:</h4>\n")
-for (i in 1:nrow(linkData)) {
-if (grepl(".pdf", linkData$Link[i])) {
-HtmlLink(linkData$Link[i], linkData$Label[i])
-}
-}
-cata("<h4>Tables:</h4>\n")
-for (i in 1:nrow(linkData)) {
-if (grepl(".tsv", linkData$Link[i])) {
-HtmlLink(linkData$Link[i], linkData$Label[i])
-}
-}
-if (wantRda) {
-cata("<h4>R Data Objects:</h4>\n")
-for (i in 1:nrow(linkData)) {
-if (grepl(".RData", linkData$Link[i])) {
-HtmlLink(linkData$Link[i], linkData$Label[i])
-}
-}
 }
 cata("<p>Alt-click links to download file.</p>\n")
 cata("<p>Click floppy disc icon associated history item to download ")
 cata("all files.</p>\n")
 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
 cata("<h4>Additional Information</h4>\n")
 cata("<ul>\n")
-if (filtCPM || filtSmpCount || filtTotCount) {
+if (filt_cpm || filt_smpcount || filt_totcount) {
-if (filtCPM) {
+if (filt_cpm) {
-tempStr <- paste("Genes without more than", opt$cpmReq,
+temp_str <- paste(
-"CPM in at least", opt$sampleReq, "samples are insignificant",
+"Genes without more than", opt$cpmReq,
-"and filtered out.")
+"CPM in at least", opt$sampleReq, "samples are insignificant",
-} else if (filtSmpCount) {
+"and filtered out."
-tempStr <- paste("Genes without more than", opt$cntReq,
+)
-"counts in at least", opt$sampleReq, "samples are insignificant",
+} else if (filt_smpcount) {
-"and filtered out.")
+temp_str <- paste(
-} else if (filtTotCount) {
+"Genes without more than", opt$cntReq,
-tempStr <- paste("Genes without more than", opt$cntReq,
+"counts in at least", opt$sampleReq, "samples are insignificant",
-"counts, after summing counts for all samples, are insignificant",
+"and filtered out."
-"and filtered out.")
+)
-}
+} else if (filt_totcount) {
+temp_str <- paste(
-ListItem(tempStr)
+"Genes without more than", opt$cntReq,
-filterProp <- round(filteredCount/preFilterCount*100, digits=2)
+"counts, after summing counts for all samples, are insignificant",
-tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
+"and filtered out."
-"%) genes were filtered out for low expression.")
+)
-ListItem(tempStr)
+}
-}
-ListItem(opt$normOpt, " was the method used to normalise library sizes.")
+list_item(temp_str)
-if (wantLRT) {
+filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
-ListItem("The edgeR likelihood ratio test was used.")
+temp_str <- paste0(
-} else {
+filtered_count, " of ", prefilter_count, " (", filter_prop,
-if (wantRobust) {
+"%) genes were filtered out for low expression."
-ListItem("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
+)
-} else {
+list_item(temp_str)
-ListItem("The edgeR quasi-likelihood test was used.")
+}
-}
+list_item(opt$normOpt, " was the method used to normalise library sizes.")
-}
+if (want_lrt) {
-if (opt$pAdjOpt!="none") {
+list_item("The edgeR likelihood ratio test was used.")
-if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
+} else {
-tempStr <- paste0("MD-Plot highlighted genes are significant at FDR ",
+if (want_robust) {
-"of ", opt$pValReq," and exhibit log2-fold-change of at ",
+list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
-"least ", opt$lfcReq, ".")
+} else {
-ListItem(tempStr)
+list_item("The edgeR quasi-likelihood test was used.")
-} else if (opt$pAdjOpt=="holm") {
+}
-tempStr <- paste0("MD-Plot highlighted genes are significant at adjusted ",
+}
-"p-value of ", opt$pValReq,"  by the Holm(1979) ",
+if (opt$pAdjOpt != "none") {
-"method, and exhibit log2-fold-change of at least ",
+if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
-opt$lfcReq, ".")
+temp_str <- paste0(
-ListItem(tempStr)
+"MD-Plot highlighted genes are significant at FDR ",
-}
+"of ", opt$pValReq, " and exhibit log2-fold-change of at ",
-} else {
+"least ", opt$lfcReq, "."
-tempStr <- paste0("MD-Plot highlighted genes are significant at p-value ",
+)
-"of ", opt$pValReq," and exhibit log2-fold-change of at ",
+list_item(temp_str)
-"least ", opt$lfcReq, ".")
+} else if (opt$pAdjOpt == "holm") {
-ListItem(tempStr)
+temp_str <- paste0(
+"MD-Plot highlighted genes are significant at adjusted ",
+"p-value of ", opt$pValReq, "  by the Holm(1979) ",
+"method, and exhibit log2-fold-change of at least ",
+opt$lfcReq, "."
+)
+list_item(temp_str)
+}
+} else {
+temp_str <- paste0(
+"MD-Plot highlighted genes are significant at p-value ",
+"of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+"least ", opt$lfcReq, "."
+)
+list_item(temp_str)
 }
 cata("</ul>\n")
 cata("<h4>Summary of experimental data:</h4>\n")
 cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
 cata("<table border=\"1\" cellpadding=\"3\">\n")
 cata("<tr>\n")
-TableHeadItem("SampleID")
+table_head_item("SampleID")
-TableHeadItem(names(factors)[1], " (Primary Factor)")
+table_head_item(names(factors)[1], " (Primary Factor)")
 if (ncol(factors) > 1) {
 for (i in names(factors)[2:length(names(factors))]) {
-TableHeadItem(i)
+table_head_item(i)
 }
 cata("</tr>\n")
 }
-for (i in 1:nrow(factors)) {
+for (i in seq_len(nrow((factors)))) {
 cata("<tr>\n")
-TableHeadItem(row.names(factors)[i])
+table_head_item(row.names(factors)[i])
-for (j in 1:ncol(factors)) {
+for (j in seq_len(ncol(factors))) {
-TableItem(as.character(unmake.names(factors[i, j])))
+table_item(as.character(unmake_names(factors[i, j])))
 }
 cata("</tr>\n")
 }
 cata("</table>")
-for (i in 1:nrow(linkData)) {
+for (i in seq_len(nrow(link_data))) {
-if (grepl("session_info", linkData$Link[i])) {
+if (grepl("session_info", link_data$Link[i])) {
-HtmlLink(linkData$Link[i], linkData$Label[i])
+html_link(link_data$Link[i], link_data$Label[i])
 }
 }
 cata("<table border=\"0\">\n")
 cata("<tr>\n")
-TableItem("Task started at:"); TableItem(timeStart)
+table_item("Task started at:")
+table_item(time_start)
 cata("</tr>\n")
 cata("<tr>\n")
-TableItem("Task ended at:"); TableItem(timeEnd)
+table_item("Task ended at:")
+table_item(time_end)
 cata("</tr>\n")
 cata("<tr>\n")
-TableItem("Task run time:"); TableItem(timeTaken)
+table_item("Task run time:")
+table_item(time_taken)
 cata("<tr>\n")
 cata("</table>\n")
 cata("</body>\n")
 cata("</html>")

Mercurial > repos > iuc > edger

comparison edger.R @ 8:ea85027d986c draft