annotate topGO_enrichment.R @ 24:6d946862a105 draft default tip

"planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
author proteore
date Mon, 17 May 2021 12:44:00 +0000
parents 537a0aae9b41
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
1 options(warn = -1) #TURN OFF WARNINGS !!!!!!
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
2
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
3 suppressMessages(library(ggplot2))
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
4 suppressMessages(library(topGO))
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
5
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
6 get_args <- function() {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
7
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
8 ## Collect arguments
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
9 args <- commandArgs(TRUE)
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
10
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
11 ## Default setting when no arguments passed
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
12 if (length(args) < 1) {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
13 args <- c("--help")
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
14 }
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
15
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
16 ## Help section
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
17 if ("--help" %in% args) {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
18 cat("Pathview R script
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
19 Arguments:
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
20 --help Print this test
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
21 --input_type
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
22 --onto
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
23 --option
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
24 --correction
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
25 --threshold
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
26 --text
18
36c97ab06d40 planemo upload commit c7db932b9ebeb5f9dc8ddac5fd87417ca30e8a80
proteore
parents: 17
diff changeset
27 --plot
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
28 --column
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
29 --geneuniverse
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
30 --header
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
31
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
32 Example:
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
33 Rscript --vanilla enrichment_v3.R --inputtype=tabfile (or copypaste)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
34 --input=file.txt --ontology='BP/CC/MF' --option=option
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
35 (e.g : classic/elim...) --threshold=threshold --correction=correction
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
36 --textoutput=text --barplotoutput=barplot --dotplotoutput=dotplot
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
37 --column=column --geneuniver=human \n\n")
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
38
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
39 q(save = "no")
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
40 }
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
41
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
42 parseargs <- function(x) strsplit(sub("^--", "", x), "=")
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
43 argsdf <- as.data.frame(do.call("rbind", parseargs(args)))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
44 args <- as.list(as.character(argsdf$V2))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
45 names(args) <- argsdf$V1
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
46
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
47 return(args)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
48 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
49
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
50 read_file <- function(path, header) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
51 file <- try(read.csv(path, header = header, sep = "\t",
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
52 stringsAsFactors = FALSE, quote = "\"", check.names = F),
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
53 silent = TRUE)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
54 if (inherits(file, "try-error")) {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
55 stop("File not found !")
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
56 }else {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
57 return(file)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
58 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
59 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
60
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
61 get_list_from_cp <- function(list) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
62 list <- gsub(";", " ", list)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
63 list <- strsplit(list, "[ \t\n]+")[[1]]
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
64 list <- list[list != ""] #remove empty entry
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
65 list <- gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2")
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
66 return(list)
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
67 }
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
68
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
69 check_ens_ids <- function(vector) {
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
70 ens_pattern <-
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
71 "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
72 |CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
73 return(grepl(ens_pattern, vector))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
74 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
75
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
76 str2bool <- function(x) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
77 if (any(is.element(c("t", "true"), tolower(x)))) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
78 return(TRUE)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
79 }else if (any(is.element(c("f", "false"), tolower(x)))) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
80 return(FALSE)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
81 }else {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
82 return(NULL)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
83 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
84 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
85
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
86 # Some libraries such as GOsummaries won't be able to treat the values such as
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
87 # "< 1e-30" produced by topGO. As such it is important to delete the < char
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
88 # with the deleteinfchar function. Nevertheless the user will have access to
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
89 #the original results in the text output.
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
90 deleteinfchar <- function(values) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
91
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
92 lines <- grep("<", values)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
93 if (length(lines) != 0) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
94 for (line in lines) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
95 values[line] <- gsub("<", "", values[line])
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
96 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
97 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
98 return(values)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
99 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
100
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
101 #nolint start
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
102 corrMultipleTesting = function(result, mygodata, correction, threshold){
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
103
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
104 # adjust for multiple testing
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
105 if (correction != "none"){
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
106 # GenTable : transforms the result object into a list. Filters can be applied
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
107 # (e.g : with the topNodes argument, to get for instance only the n first
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
108 # GO terms with the lowest pvalues), but as we want to apply a correction we
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
109 # take all the GO terms, no matter their pvalues
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
110 allRes <- GenTable(mygodata, test = result, orderBy = "result",
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
111 ranksOf = "result", topNodes = length(attributes(result)$score))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
112 # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
113 # values are converted to 1e-30 to be able to correct the pvalues
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
114 pvaluestmp = deleteinfchar(allRes$test)
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
115
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
116 # the correction is done from the modified pvalues
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
117 allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction),
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
118 n = length(pvaluestmp))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
119 allRes = as.data.frame(allRes)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
120
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
121 # Rename the test column by pvalues, so that is more explicit
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
122 nb = which(names(allRes) %in% c("test"))
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
123
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
124 names(allRes)[nb] = "pvalues"
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
125
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
126 allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold), ]
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
127 if (length(allRes$pvalues) == 0) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
128 print("Threshold was too stringent, no GO term found with pvalue
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
129 equal or lesser than the threshold value")
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
130 return(NULL)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
131 }
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
132 allRes = allRes[order(allRes$qvalues), ]
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
133 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
134
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
135 if (correction == "none"){
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
136 # get all the go terms under user threshold
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
137 mysummary <- summary(attributes(result)$score <= threshold)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
138 numsignif <- as.integer(mysummary[[3]])
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
139 # get all significant nodes
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
140 allRes <- GenTable(mygodata, test = result, orderBy = "result",
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
141 ranksOf = "result", topNodes = numsignif)
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
142
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
143
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
144 allRes = as.data.frame(allRes)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
145 # Rename the test column by pvalues, so that is more explicit
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
146 nb = which(names(allRes) %in% c("test"))
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
147 names(allRes)[nb] = "pvalues"
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
148 if (numsignif == 0) {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
149
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
150 print("Threshold was too stringent, no GO term found with pvalue
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
151 equal or lesser than the threshold value")
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
152 return(NULL)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
153 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
154
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
155 allRes = allRes[order(allRes$pvalues), ]
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
156 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
157
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
158 return(allRes)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
159 }
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
160 #nolint end
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
161
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
162 #roundvalues will simplify the results by rounding down the values.
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
163 #For instance 1.1e-17 becomes 1e-17
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
164 roundvalues <- function(values) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
165 for (line in seq_len(length(values))) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
166 values[line] <- as.numeric(gsub(".*e", "1e", as.character(values[line])))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
167 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
168 return(values)
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
169 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
170
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
171 #nolint start
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
172 createDotPlot = function(data, onto) {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
173
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
174 values = deleteinfchar(data$pvalues)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
175 values = roundvalues(values)
19
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
176 values = as.numeric(values)
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
177
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
178 geneRatio = data$Significant / data$Annotated
19
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
179 goTerms = data$Term
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
180 count = data$Significant
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
181
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
182 labely = paste("GO terms", onto, sep = " ")
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
183 ggplot(data, aes(x = geneRatio, y = goTerms, color = values, size=count)) + geom_point( ) + scale_colour_gradientn( colours = c("red", "violet", "blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color = "p-values\n" )
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
184 ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
185 width = 15, height = 15, units = "cm")
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
186 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
187
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
188 createBarPlot = function(data, onto) {
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
189
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
190 values = deleteinfchar(data$pvalues)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
191 values = roundvalues(values)
19
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
192 values = as.numeric(values)
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
193
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
194 goTerms = data$Term
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
195 count = data$Significant
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
196
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
197 labely = paste("GO terms", onto, sep=" ")
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
198 ggplot(data, aes(x = goTerms, y = count, fill = values, scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) + geom_bar(stat = "identity") + scale_fill_gradientn(colours = c("red","violet","blue")) + coord_flip() + labs(fill = "p-values\n")
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
199 ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
200 width = 15, height = 15, units = "cm")
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
201 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
202
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
203 #nolint end
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
204
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
205 # Produce the different outputs
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
206 createoutputs <- function(result, cut_result, text, barplot, dotplot, onto) {
19
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
207
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
208 if (is.null(result)) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
209 err_msg <- "None of the input ids can be found in the org package data,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
210 enrichment analysis cannot be realized. \n The inputs ids probably
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
211 either have no associated GO terms or are not ENSG identifiers
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
212 (e.g : ENSG00000012048)."
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
213 write.table(err_msg, file = "result", quote = FALSE, sep = "\t",
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
214 col.names = F, row.names = F)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
215 }else if (is.null(cut_result)) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
216 err_msg <- "Threshold was too stringent, no GO term found with pvalue equal
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
217 or lesser than the threshold value."
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
218 write.table(err_msg, file = "result.tsv", quote = FALSE, sep = "\t",
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
219 col.names = F, row.names = F)
19
0014bd289aff planemo upload commit 4cbd26b257f8a32e11289e28135da86b03b46622-dirty
proteore
parents: 18
diff changeset
220 }else {
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
221 write.table(cut_result, file = "result.tsv", quote = FALSE, sep = "\t",
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
222 col.names = T, row.names = F)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
223
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
224 if (barplot) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
225 createBarPlot(cut_result, onto) #nolint
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
226 }
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
227 if (dotplot) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
228 createDotPlot(cut_result, onto) #nolint
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
229 }
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
230 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
231 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
232
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
233 # Launch enrichment analysis and return result data from the analysis or the
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
234 # null object if the enrichment could not be done.
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
235 goenrichment <- function(geneuniverse, sample, background_sample, onto) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
236
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
237 if (is.null(background_sample)) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
238 xx <- annFUN.org(onto, mapping = geneuniverse, ID = "ensembl") #nolint
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
239 #get all the GO terms of the corresponding ontology (BP/CC/MF)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
240 #and all their associated ensembl ids according to the org package
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
241
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
242 #nolint start
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
243
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
244 allGenes <- unique(unlist(xx))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
245 #check if the genes given by the user can be found in the org package
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
246 #(gene universe), that is in allGenes
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
247 } else {
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
248 allGenes <- background_sample
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
249 }
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
250
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
251 if (length(intersect(sample,allGenes)) == 0) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
252 print("None of the input ids can be found in the org package data,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
253 enrichment analysis cannot be realized. \n The inputs ids probably
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
254 have no associated GO terms.")
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
255 return(c(NULL, NULL))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
256 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
257
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
258 geneList <- factor(as.integer(allGenes %in% sample))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
259 #duplicated ids in sample count only for one
12
3d6b76f301c2 planemo upload commit 4c04fd02dbe5234b3b1c524d65fb2de4c56e5a78-dirty
proteore
parents: 11
diff changeset
260 if (length(levels(geneList)) == 1 ){
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
261 stop("All or none of the background genes are found in tested genes dataset,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
262 enrichment analysis can't be done")
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
263 }
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
264 names(geneList) <- allGenes
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
265
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
266 #nolint end
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
267
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
268 #topGO enrichment
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
269
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
270 # Creation of a topGOdata object
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
271 # It will contain : the list of genes of interest, the GO annotations and
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
272 # the GO hierarchy
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
273 # Parameters :
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
274 # ontology : character string specifying the ontology of interest (BP, CC, MF)
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
275 # allGenes : named vector of type numeric or factor
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
276 # annot : tells topGO how to map genes to GO annotations.
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
277 # argument not used here : nodeSize : at which minimal number of GO
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
278 # annotations do we consider a gene
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
279
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
280 mygodata <- new("topGOdata", description = "SEA with TopGO", ontology = onto,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
281 allGenes = geneList, annot = annFUN.org,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
282 mapping = geneuniverse, ID = "ensembl")
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
283
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
284 # Performing enrichment tests
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
285 result <- runTest(mygodata, algorithm = option, statistic = "fisher") #nolint
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
286 return(c(result, mygodata))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
287 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
288
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
289 args <- get_args()
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
290
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
291
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
292 input_type <- args$inputtype
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
293 input <- args$input
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
294 onto <- args$ontology
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
295 option <- args$option
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
296 correction <- args$correction
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
297 threshold <- as.numeric(args$threshold)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
298 text <- str2bool(args$textoutput)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
299 barplot <- "barplot" %in% unlist(strsplit(args$plot, ","))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
300 dotplot <- "dotplot" %in% unlist(strsplit(args$plot, ","))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
301 column <- as.numeric(gsub("c", "", args$column))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
302 geneuniverse <- args$geneuniverse
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
303 header <- str2bool(args$header)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
304 background <- str2bool(args$background)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
305 if (background) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
306 background_genes <- args$background_genes
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
307 background_input_type <- args$background_input_type
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
308 background_header <- str2bool(args$background_header)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
309 background_column <- as.numeric(gsub("c", "", args$background_column))
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
310 }
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
311
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
312 #get input
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
313 if (input_type == "copy_paste") {
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
314 sample <- get_list_from_cp(input)
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
315 } else if (input_type == "file") {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
316 tab <- read_file(input, header)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
317 sample <- trimws(unlist(strsplit(tab[, column], ";")))
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
318 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
319
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
320 #check of ENS ids
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
321 if (! any(check_ens_ids(sample))) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
322 stop("no ensembl gene ids found in your ids list,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
323 please check your IDs in input or the selected column of your input file")
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
324 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
325
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
326 #get input if background genes
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
327 if (background) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
328 if (background_input_type == "copy_paste") {
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
329 background_sample <- get_list_from_cp(background_genes)
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
330 } else if (background_input_type == "file") {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
331 background_tab <- read_file(background_genes, background_header)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
332 background_sample <- unique(trimws(unlist(
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
333 strsplit(background_tab[, background_column], ";"))))
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
334 }
12
3d6b76f301c2 planemo upload commit 4c04fd02dbe5234b3b1c524d65fb2de4c56e5a78-dirty
proteore
parents: 11
diff changeset
335 #check of ENS ids
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
336 if (! any(check_ens_ids(background_sample))) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
337 stop("no ensembl gene ids found in your background ids list,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
338 please check your IDs in input or the selected column of your input file")
12
3d6b76f301c2 planemo upload commit 4c04fd02dbe5234b3b1c524d65fb2de4c56e5a78-dirty
proteore
parents: 11
diff changeset
339 }
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
340 } else {
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
341 background_sample <- NULL
11
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
342 }
ddcc0347c54a planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
proteore
parents: 10
diff changeset
343
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
344 # Launch enrichment analysis
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
345 allresult <- suppressMessages(goenrichment(geneuniverse, sample,
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
346 background_sample, onto))
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
347 result <- allresult[1][[1]]
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
348 mygodata <- allresult[2][[1]]
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
349 if (!is.null(result)) {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
350 cut_result <- corrMultipleTesting(result, mygodata, correction, threshold)
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
351 #Adjust the result with a multiple testing correction or not and with the
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
352 #user, p-value cutoff
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
353 }else {
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
354 cut_result <- NULL
10
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
355 }
511b060e9890 planemo upload commit 5733164d8d85e557ef99150193e71221df4ea012-dirty
proteore
parents:
diff changeset
356
24
6d946862a105 "planemo upload commit bcb889be3548adf81c6d163c27a0d9afc71b8e05-dirty"
proteore
parents: 20
diff changeset
357 createoutputs(result, cut_result, text, barplot, dotplot, onto)