annotate enrichment_v3.R @ 3:2f9236ac2c1d draft

planemo upload commit 0be58bb700f64de6792a7234a11675bae2755e8f-dirty
author proteore
date Thu, 13 Sep 2018 08:58:15 -0400
parents 5569a3f066cf
children 81d204aac06b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
1 # enrichment_v3.R
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
2 # Usage : Rscript --vanilla enrichment_v3.R --inputtype tabfile (or
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
3 # copypaste) --input file.txt --ontology "BP/CC/MF" --option option (e.g
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
4 # : classic/elim...) --threshold threshold --correction correction --textoutput
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
5 # text --barplotoutput barplot
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
6 # --dotplotoutput dotplot --column column --geneuniver human
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
7 # e.g : Rscript --vanilla enrichment_v3.R --inputtype tabfile --input file.txt
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
8 # --ontology BP --option classic --threshold 1e-15 --correction holm
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
9 # --textoutput TRUE
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
10 # --barplotoutput TRUE --dotplotoutput TRUE --column c1 --geneuniverse
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
11 # org.Hs.eg.db
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
12 # INPUT :
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
13 # - type of input. Can be ids separated by a blank space (copypast), or a text
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
14 # file (tabfile)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
15 # - file with at least one column of ensembl ids
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
16 # - gene ontology category : Biological Process (BP), Cellular Component (CC), Molecular Function (MF)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
17 # - test option (relative to topGO algorithms) : elim, weight01, parentchild, or no option (classic)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
18 # - threshold for enriched GO term pvalues (e.g : 1e-15)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
19 # - correction for multiple testing (see p.adjust options : holm, hochberg, hommel, bonferroni, BH, BY,fdr,none
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
20 # - outputs wanted in this order text, barplot, dotplot with boolean value (e.g
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
21 # : TRUE TRUE TRUE ).
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
22 # Declare the output not wanted as none
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
23 # - column containing the ensembl ids if the input file is a tabfile
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
24 # - gene universe reference for the user chosen specie
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
25 # - header : if the input is a text file, does this text file have a header
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
26 # (TRUE/FALSE)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
27 #
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
28 # OUTPUT :
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
29 # - outputs commanded by the user named respectively result.tsv for the text
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
30 # results file, barplot.png for the barplot image file and dotplot.png for the
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
31 # dotplot image file
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
32
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
33
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
34 # loading topGO library
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
35 library(topGO)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
36
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
37 # Read file and return file content as data.frame
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
38 readfile = function(filename, header) {
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
39 if (header == "true") {
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
40 # Read only first line of the file as header:
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
41 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
42 #Read the data of the files (skipping the first row)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
43 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
44 # Remove empty rows
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
45 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
46 #And assign the header to the data
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
47 names(file) <- headers
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
48 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
49 else {
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
50 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
51 # Remove empty rows
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
52 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
53 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
54 return(file)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
55 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
56
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
57 check_ens_ids <- function(vector) {
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
58 ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
59 return(grepl(ens_pattern,vector))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
60 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
61
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
62 '%!in%' <- function(x,y)!('%in%'(x,y))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
63
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
64
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
65 # Parse command line arguments
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
66
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
67 args = commandArgs(trailingOnly = TRUE)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
68
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
69 # create a list of the arguments from the command line, separated by a blank space
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
70 hh <- paste(unlist(args),collapse=' ')
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
71
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
72 # delete the first element of the list which is always a blank space
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
73 listoptions <- unlist(strsplit(hh,'--'))[-1]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
74
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
75 # for each input, split the arguments with blank space as separator, unlist,
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
76 # and delete the first element which is the input name (e.g --inputtype)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
77 options.args <- sapply(listoptions,function(x){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
78 unlist(strsplit(x, '[ \t\n]+'))[-1]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
79 })
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
80 # same as the step above, except that only the names are kept
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
81 options.names <- sapply(listoptions,function(x){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
82 option <- unlist(strsplit(x, '[ \t\n]+'))[1]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
83 })
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
84 names(options.args) <- unlist(options.names)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
85
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
86
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
87 if (length(options.args) != 12) {
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
88 stop("Not enough/Too many arguments", call. = FALSE)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
89 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
90
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
91 typeinput = options.args[1]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
92 listfile = options.args[2]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
93 onto = as.character(options.args[3])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
94 option = as.character(options.args[4])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
95 correction = as.character(options.args[6])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
96 threshold = as.numeric(options.args[5])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
97 text = as.character(options.args[7])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
98 barplot = as.character(options.args[8])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
99 dotplot = as.character(options.args[9])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
100 column = as.numeric(gsub("c","",options.args[10]))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
101 geneuniverse = as.character(options.args[11])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
102 header = as.character(options.args[12])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
103
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
104 if (typeinput=="copypaste"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
105 sample = as.data.frame(unlist(listfile))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
106 sample = sample[,column]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
107 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
108 if (typeinput=="tabfile"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
109
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
110 if (header=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
111 sample = readfile(listfile, "true")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
112 }else{
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
113 sample = readfile(listfile, "false")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
114 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
115 sample = sample[,column]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
116 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
117
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
118 #check of ENS ids
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
119 if (! any(check_ens_ids(sample))){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
120 print("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
121 stop()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
122 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
123
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
124 # Launch enrichment analysis and return result data from the analysis or the null
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
125 # object if the enrichment could not be done.
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
126 goEnrichment = function(geneuniverse,sample,onto){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
127
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
128 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
129 # associated ensembl ids according to the org package
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
130 xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
131 allGenes = unique(unlist(xx))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
132 # check if the genes given by the user can be found in the org package (gene
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
133 # universe), that is in
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
134 # allGenes
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
135 if (length(intersect(sample,allGenes))==0){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
136
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
137 print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
138 return(c(NULL,NULL))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
139
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
140 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
141
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
142 geneList = factor(as.integer(allGenes %in% sample))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
143 names(geneList) <- allGenes
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
144
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
145
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
146 #topGO enrichment
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
147
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
148
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
149 # Creation of a topGOdata object
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
150 # It will contain : the list of genes of interest, the GO annotations and the GO hierarchy
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
151 # Parameters :
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
152 # ontology : character string specifying the ontology of interest (BP, CC, MF)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
153 # allGenes : named vector of type numeric or factor
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
154 # annot : tells topGO how to map genes to GO annotations.
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
155 # argument not used here : nodeSize : at which minimal number of GO annotations
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
156 # do we consider a gene
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
157
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
158 myGOdata = new("topGOdata", description="SEA with TopGO", ontology=onto, allGenes=geneList, annot = annFUN.org, mapping=geneuniverse,ID="ensembl")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
159
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
160
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
161 # Performing enrichment tests
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
162 result <- runTest(myGOdata, algorithm=option, statistic="fisher")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
163 return(c(result,myGOdata))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
164 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
165
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
166 # Some libraries such as GOsummaries won't be able to treat the values such as
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
167 # "< 1e-30" produced by topGO. As such it is important to delete the < char
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
168 # with the deleteInfChar function. Nevertheless the user will have access to the original results in the text output.
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
169 deleteInfChar = function(values){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
170
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
171 lines = grep("<",values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
172 if (length(lines)!=0){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
173 for (line in lines){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
174 values[line]=gsub("<","",values[line])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
175 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
176 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
177 return(values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
178 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
179
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
180 corrMultipleTesting = function(result, myGOdata,correction,threshold){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
181
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
182 # adjust for multiple testing
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
183 if (correction!="none"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
184 # GenTable : transforms the result object into a list. Filters can be applied
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
185 # (e.g : with the topNodes argument, to get for instance only the n first
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
186 # GO terms with the lowest pvalues), but as we want to apply a correction we
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
187 # take all the GO terms, no matter their pvalues
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
188 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=length(attributes(result)$score))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
189 # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
190 # values are converted to 1e-30 to be able to correct the pvalues
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
191 pvaluestmp = deleteInfChar(allRes$test)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
192
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
193 # the correction is done from the modified pvalues
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
194 allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), n = length(pvaluestmp))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
195 allRes = as.data.frame(allRes)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
196
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
197 # Rename the test column by pvalues, so that is more explicit
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
198 nb = which(names(allRes) %in% c("test"))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
199
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
200 names(allRes)[nb] = "pvalues"
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
201
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
202 allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold),]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
203 if (length(allRes$pvalues)==0){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
204 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
205 return(NULL)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
206 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
207 allRes = allRes[order(allRes$qvalues),]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
208 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
209
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
210 if (correction=="none"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
211 # get all the go terms under user threshold
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
212 mysummary <- summary(attributes(result)$score <= threshold)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
213 numsignif <- as.integer(mysummary[[3]])
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
214 # get all significant nodes
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
215 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=numsignif)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
216
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
217
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
218 allRes = as.data.frame(allRes)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
219 # Rename the test column by pvalues, so that is more explicit
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
220 nb = which(names(allRes) %in% c("test"))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
221 names(allRes)[nb] = "pvalues"
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
222 if (numsignif==0){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
223
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
224 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
225 return(NULL)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
226 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
227
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
228 allRes = allRes[order(allRes$pvalues),]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
229 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
230
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
231 return(allRes)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
232 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
233
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
234 # roundValues will simplify the results by rounding down the values. For instance 1.1e-17 becomes 1e-17
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
235 roundValues = function(values){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
236 for (line in 1:length(values)){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
237 values[line]=as.numeric(gsub(".*e","1e",as.character(values[line])))
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
238 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
239 return(values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
240 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
241
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
242 createDotPlot = function(data, onto){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
243
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
244 values = deleteInfChar(data$pvalues)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
245 values = roundValues(values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
246 values = as.numeric(values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
247
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
248 geneRatio = data$Significant/data$Annotated
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
249 goTerms = data$Term
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
250 count = data$Significant
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
251
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
252 labely = paste("GO terms",onto,sep=" ")
1
5569a3f066cf planemo upload commit dd5bd9c90796d9d9a0f62992bb5ca33b0efe6a05-dirty
proteore
parents: 0
diff changeset
253 ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" )
5569a3f066cf planemo upload commit dd5bd9c90796d9d9a0f62992bb5ca33b0efe6a05-dirty
proteore
parents: 0
diff changeset
254 ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
0
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
255 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
256
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
257 createBarPlot = function(data, onto){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
258
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
259
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
260 values = deleteInfChar(data$pvalues)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
261 values = roundValues(values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
262
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
263 values = as.numeric(values)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
264 goTerms = data$Term
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
265 count = data$Significant
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
266
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
267 labely = paste("GO terms",onto,sep=" ")
1
5569a3f066cf planemo upload commit dd5bd9c90796d9d9a0f62992bb5ca33b0efe6a05-dirty
proteore
parents: 0
diff changeset
268 ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n")
5569a3f066cf planemo upload commit dd5bd9c90796d9d9a0f62992bb5ca33b0efe6a05-dirty
proteore
parents: 0
diff changeset
269 ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
0
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
270 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
271
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
272
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
273 # Produce the different outputs
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
274 createOutputs = function(result, cut_result,text, barplot, dotplot, onto){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
275
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
276
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
277 if (is.null(result)){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
278
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
279 if (text=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
280
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
281 err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)."
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
282 write.table(err_msg, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
283
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
284 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
285
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
286 if (barplot=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
287
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
288 png(filename="barplot.png")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
289 plot.new()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
290 #text(0,0,err_msg)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
291 dev.off()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
292 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
293
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
294 if (dotplot=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
295
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
296 png(filename="dotplot.png")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
297 plot.new()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
298 #text(0,0,err_msg)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
299 dev.off()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
300
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
301 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
302 return(TRUE)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
303 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
304
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
305
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
306 if (is.null(cut_result)){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
307
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
308
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
309 if (text=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
310
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
311 err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value."
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
312 write.table(err_msg, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
313
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
314 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
315
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
316 if (barplot=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
317
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
318 png(filename="barplot.png")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
319 plot.new()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
320 text(0,0,err_msg)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
321 dev.off()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
322 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
323
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
324 if (dotplot=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
325
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
326 png(filename="dotplot.png")
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
327 plot.new()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
328 text(0,0,err_msg)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
329 dev.off()
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
330
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
331 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
332 return(TRUE)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
333
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
334
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
335
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
336 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
337
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
338 if (text=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
339 write.table(cut_result, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
340 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
341
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
342 if (barplot=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
343
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
344 createBarPlot(cut_result, onto)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
345 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
346
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
347 if (dotplot=="TRUE"){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
348
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
349 createDotPlot(cut_result, onto)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
350 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
351 return(TRUE)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
352 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
353
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
354
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
355
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
356 # Load R library ggplot2 to plot graphs
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
357 library(ggplot2)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
358
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
359 # Launch enrichment analysis
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
360 allresult = goEnrichment(geneuniverse,sample,onto)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
361 result = allresult[1][[1]]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
362 myGOdata = allresult[2][[1]]
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
363 if (!is.null(result)){
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
364
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
365 # Adjust the result with a multiple testing correction or not and with the user
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
366 # p-value cutoff
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
367 cut_result = corrMultipleTesting(result,myGOdata, correction,threshold)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
368 }else{
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
369
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
370 cut_result=NULL
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
371
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
372 }
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
373
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
374
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
375 createOutputs(result, cut_result,text, barplot, dotplot, onto)
92dfcfb03add planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
376