Mercurial > repos > iuc > ruvseq
comparison get_deseq_dataset.R @ 2:a32619eef8b9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 4daa375d022673d2437d609b1865b78c64b04415"
| author | iuc |
|---|---|
| date | Fri, 15 Jan 2021 17:52:45 +0000 |
| parents | 99e0c629d3a2 |
| children | 557362ddaf9e |
comparison
equal
deleted
inserted
replaced
| 1:99e0c629d3a2 | 2:a32619eef8b9 |
|---|---|
| 1 get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) { | 1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { |
| 2 | 2 |
| 3 dir <- "" | 3 dir <- "" |
| 4 | 4 |
| 5 if (!is.null(header)) { | 5 has_header <- !is.null(header) |
| 6 hasHeader <- TRUE | 6 use_txi <- !is.null(tximport) |
| 7 } else { | 7 if (use_txi) { |
| 8 hasHeader <- FALSE | 8 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") |
| 9 if (tolower(file_ext(tx2gene)) == "gff") { | |
| 10 gff_file <- tx2gene | |
| 11 } else { | |
| 12 gff_file <- NULL | |
| 13 tx2gene <- read.table(tx2gene, header = has_header) | |
| 14 } | |
| 9 } | 15 } |
| 10 | 16 |
| 11 if (!is.null(tximport)) { | 17 if (!use_txi & has_header) { |
| 12 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") | 18 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) |
| 13 if (tolower(file_ext(tx2gene)) == "gff") { | |
| 14 gffFile <-tx2gene | |
| 15 } else { | |
| 16 gffFile <- NULL | |
| 17 tx2gene <- read.table(tx2gene, header=hasHeader) | |
| 18 } | |
| 19 useTXI <- TRUE | |
| 20 } else { | |
| 21 useTXI <- FALSE | |
| 22 } | |
| 23 | |
| 24 if (!useTXI & hasHeader) { | |
| 25 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) | |
| 26 tbl <- do.call("cbind", countfiles) | 19 tbl <- do.call("cbind", countfiles) |
| 27 colnames(tbl) <- rownames(sampleTable) # take sample ids from header | 20 colnames(tbl) <- rownames(sample_table) # take sample ids from header |
| 28 | 21 |
| 29 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) | 22 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) |
| 30 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", | 23 old_special_names <- c( |
| 31 "not_aligned", "alignment_not_unique") | 24 "no_feature", |
| 32 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames | 25 "ambiguous", |
| 33 tbl <- tbl[!specialRows, , drop = FALSE] | 26 "too_low_aQual", |
| 27 "not_aligned", | |
| 28 "alignment_not_unique" | |
| 29 ) | |
| 30 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names | |
| 31 tbl <- tbl[!special_rows, , drop = FALSE] | |
| 34 | 32 |
| 35 dds <- DESeqDataSetFromMatrix(countData = tbl, | 33 dds <- DESeqDataSetFromMatrix( |
| 36 colData = subset(sampleTable, select=-(filename)), | 34 countData = tbl, |
| 37 design = designFormula) | 35 colData = subset(sample_table, select = -filename), |
| 38 } else if (!useTXI & !hasHeader) { | 36 design = design_formula |
| 37 ) | |
| 38 } else if (!use_txi & !has_header) { | |
| 39 | 39 |
| 40 # construct the object from HTSeq files | 40 # construct the object from HTSeq files |
| 41 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 41 dds <- DESeqDataSetFromHTSeqCount( |
| 42 directory = dir, | 42 sampleTable = sample_table, |
| 43 design = designFormula) | 43 directory = dir, |
| 44 colnames(dds) <- row.names(sampleTable) | 44 design = design_formula |
| 45 ) | |
| 46 colnames(dds) <- row.names(sample_table) | |
| 45 | 47 |
| 46 } else { | 48 } else { |
| 47 # construct the object using tximport | 49 # construct the object using tximport |
| 48 library("tximport") | 50 library("tximport") |
| 49 txiFiles <- as.character(sampleTable$filename) | 51 txi_files <- as.character(sample_table$filename) |
| 50 labs <- row.names(sampleTable) | 52 labs <- row.names(sample_table) |
| 51 names(txiFiles) <- labs | 53 names(txi_files) <- labs |
| 52 if (!is.null(gffFile)) { | 54 if (!is.null(gff_file)) { |
| 53 # first need to make the tx2gene table | 55 # first need to make the tx2gene table |
| 54 # this takes ~2-3 minutes using Bioconductor functions | 56 # this takes ~2-3 minutes using Bioconductor functions |
| 55 suppressPackageStartupMessages({ | 57 suppressPackageStartupMessages({ |
| 56 library("GenomicFeatures") | 58 library("GenomicFeatures") |
| 57 }) | 59 }) |
| 58 txdb <- makeTxDbFromGFF(gffFile) | 60 txdb <- makeTxDbFromGFF(gff_file) |
| 59 k <- keys(txdb, keytype = "TXNAME") | 61 k <- keys(txdb, keytype = "TXNAME") |
| 60 tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") | 62 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") |
| 61 # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) | 63 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) |
| 62 tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) | 64 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint |
| 63 } | 65 } |
| 64 try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) | 66 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) |
| 65 if (!exists("txi")) { | 67 if (!exists("txi")) { |
| 66 # Remove version from transcript IDs in tx2gene... | 68 # Remove version from transcript IDs in tx2gene... |
| 67 tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) | 69 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint |
| 68 # ...and in txiFiles | 70 # ...and in txi_files |
| 69 txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) | 71 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) |
| 70 } | 72 } |
| 71 dds <- DESeqDataSetFromTximport(txi, | 73 dds <- DESeqDataSetFromTximport( |
| 72 subset(sampleTable, select=-c(filename)), | 74 txi, |
| 73 designFormula) | 75 subset(sample_table, select = -c(filename)), |
| 76 design_formula | |
| 77 ) | |
| 74 } | 78 } |
| 75 return(dds) | 79 return(dds) |
| 76 } | 80 } |
