Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 27:dc6bc19f05ab draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 83eb5b2665d87c02b270596f8175499e69061032
| author | iuc |
|---|---|
| date | Sat, 19 May 2018 03:55:30 -0400 |
| parents | 47267a044ef1 |
| children | 5707d3c668d6 |
comparison
equal
deleted
inserted
replaced
| 26:47267a044ef1 | 27:dc6bc19f05ab |
|---|---|
| 6 # This argument is required: | 6 # This argument is required: |
| 7 # | 7 # |
| 8 # 'factors' a JSON list object from Galaxy | 8 # 'factors' a JSON list object from Galaxy |
| 9 # | 9 # |
| 10 # the output file has columns: | 10 # the output file has columns: |
| 11 # | 11 # |
| 12 # baseMean (mean normalized count) | 12 # baseMean (mean normalized count) |
| 13 # log2FoldChange (by default a moderated LFC estimate) | 13 # log2FoldChange (by default a moderated LFC estimate) |
| 14 # lfcSE (the standard error) | 14 # lfcSE (the standard error) |
| 15 # stat (the Wald statistic) | 15 # stat (the Wald statistic) |
| 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) | 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) |
| 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) | 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) |
| 18 # | 18 # |
| 19 # the first variable in 'factors' will be the primary factor. | 19 # the first variable in 'factors' will be the primary factor. |
| 20 # the levels of the primary factor are used in the order of appearance in factors. | 20 # the levels of the primary factor are used in the order of appearance in factors. |
| 21 # | 21 # |
| 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' | 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' |
| 23 # | 23 # |
| 46 spec <- matrix(c( | 46 spec <- matrix(c( |
| 47 "quiet", "q", 0, "logical", | 47 "quiet", "q", 0, "logical", |
| 48 "help", "h", 0, "logical", | 48 "help", "h", 0, "logical", |
| 49 "outfile", "o", 1, "character", | 49 "outfile", "o", 1, "character", |
| 50 "countsfile", "n", 1, "character", | 50 "countsfile", "n", 1, "character", |
| 51 "header", "H", 0, "logical", | |
| 51 "factors", "f", 1, "character", | 52 "factors", "f", 1, "character", |
| 52 "files_to_labels", "l", 1, "character", | 53 "files_to_labels", "l", 1, "character", |
| 53 "plots" , "p", 1, "character", | 54 "plots" , "p", 1, "character", |
| 54 "tximport", "i", 0, "logical", | 55 "tximport", "i", 0, "logical", |
| 55 "txtype", "y", 1, "character", | 56 "txtype", "y", 1, "character", |
| 82 | 83 |
| 83 verbose <- if (is.null(opt$quiet)) { | 84 verbose <- if (is.null(opt$quiet)) { |
| 84 TRUE | 85 TRUE |
| 85 } else { | 86 } else { |
| 86 FALSE | 87 FALSE |
| 88 } | |
| 89 | |
| 90 if (!is.null(opt$header)) { | |
| 91 hasHeader <- TRUE | |
| 92 } else { | |
| 93 hasHeader <- FALSE | |
| 87 } | 94 } |
| 88 | 95 |
| 89 if (!is.null(opt$tximport)) { | 96 if (!is.null(opt$tximport)) { |
| 90 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") | 97 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") |
| 91 if (tolower(file_ext(opt$tx2gene)) == "gtf") { | 98 if (tolower(file_ext(opt$tx2gene)) == "gtf") { |
| 136 rownames(sampleTable) <- labs | 143 rownames(sampleTable) <- labs |
| 137 | 144 |
| 138 primaryFactor <- factors[1] | 145 primaryFactor <- factors[1] |
| 139 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) | 146 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) |
| 140 | 147 |
| 141 if (verbose) { | |
| 142 cat("DESeq2 run information\n\n") | |
| 143 cat("sample table:\n") | |
| 144 print(sampleTable[,-c(1:2),drop=FALSE]) | |
| 145 cat("\ndesign formula:\n") | |
| 146 print(designFormula) | |
| 147 cat("\n\n") | |
| 148 } | |
| 149 | |
| 150 # these are plots which are made once for each analysis | 148 # these are plots which are made once for each analysis |
| 151 generateGenericPlots <- function(dds, factors) { | 149 generateGenericPlots <- function(dds, factors) { |
| 152 library("ggplot2") | 150 library("ggplot2") |
| 153 library("ggrepel") | 151 library("ggrepel") |
| 154 library("pheatmap") | 152 library("pheatmap") |
| 200 } | 198 } |
| 201 | 199 |
| 202 # For JSON input from Galaxy, path is absolute | 200 # For JSON input from Galaxy, path is absolute |
| 203 dir <- "" | 201 dir <- "" |
| 204 | 202 |
| 205 if (!useTXI) { | 203 if (!useTXI & hasHeader) { |
| 204 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) | |
| 205 tbl <- do.call("cbind", countfiles) | |
| 206 rownames(sampleTable) <- colnames(tbl) # take sample ids from header | |
| 207 | |
| 208 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) | |
| 209 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", | |
| 210 "not_aligned", "alignment_not_unique") | |
| 211 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames | |
| 212 tbl <- tbl[!specialRows, , drop = FALSE] | |
| 213 | |
| 214 dds <- DESeqDataSetFromMatrix(countData = tbl, | |
| 215 colData = sampleTable[,-c(1:2), drop=FALSE], | |
| 216 design = designFormula) | |
| 217 } else if (!useTXI & !hasHeader) { | |
| 218 | |
| 206 # construct the object from HTSeq files | 219 # construct the object from HTSeq files |
| 207 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 220 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, |
| 208 directory = dir, | 221 directory = dir, |
| 209 design = designFormula) | 222 design = designFormula) |
| 210 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) | 223 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) |
| 211 colnames(dds) <- labs | 224 colnames(dds) <- labs |
| 225 | |
| 212 } else { | 226 } else { |
| 213 # construct the object using tximport | 227 # construct the object using tximport |
| 214 # first need to make the tx2gene table | 228 # first need to make the tx2gene table |
| 215 # this takes ~2-3 minutes using Bioconductor functions | 229 # this takes ~2-3 minutes using Bioconductor functions |
| 216 if (!is.null(gtfFile)) { | 230 if (!is.null(gtfFile)) { |
| 230 dds <- DESeqDataSetFromTximport(txi, | 244 dds <- DESeqDataSetFromTximport(txi, |
| 231 sampleTable[,3:ncol(sampleTable),drop=FALSE], | 245 sampleTable[,3:ncol(sampleTable),drop=FALSE], |
| 232 designFormula) | 246 designFormula) |
| 233 } | 247 } |
| 234 | 248 |
| 235 if (verbose) cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) | 249 if (verbose) { |
| 250 cat("DESeq2 run information\n\n") | |
| 251 cat("sample table:\n") | |
| 252 print(sampleTable[,-c(1:2),drop=FALSE]) | |
| 253 cat("\ndesign formula:\n") | |
| 254 print(designFormula) | |
| 255 cat("\n\n") | |
| 256 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) | |
| 257 } | |
| 258 | |
| 236 # optional outlier behavior | 259 # optional outlier behavior |
| 237 if (is.null(opt$outlier_replace_off)) { | 260 if (is.null(opt$outlier_replace_off)) { |
| 238 minRep <- 7 | 261 minRep <- 7 |
| 239 } else { | 262 } else { |
| 240 minRep <- Inf | 263 minRep <- Inf |
| 241 if (verbose) cat("outlier replacement off\n") | 264 if (verbose) cat("outlier replacement off\n") |
| 242 } | 265 } |
| 243 if (is.null(opt$outlier_filter_off)) { | 266 if (is.null(opt$outlier_filter_off)) { |
| 244 cooksCutoff <- TRUE | 267 cooksCutoff <- TRUE |
| 245 } else { | 268 } else { |
| 246 cooksCutoff <- FALSE | 269 cooksCutoff <- FALSE |
| 247 if (verbose) cat("outlier filtering off\n") | 270 if (verbose) cat("outlier filtering off\n") |
| 248 } | 271 } |
| 249 | 272 |
| 250 # optional automatic mean filtering | 273 # optional automatic mean filtering |
