annotate GO-enrich.R @ 0:076349b72690 draft

planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
author proteore
date Wed, 22 Aug 2018 10:44:46 -0400
parents
children 91b9b48d07b3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
1 suppressMessages(library(clusterProfiler))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
2
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
3 #library(org.Sc.sgd.db)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
4 suppressMessages(library(org.Hs.eg.db))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
5 suppressMessages(library(org.Mm.eg.db))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
6
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
7 # Read file and return file content as data.frame
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
8 readfile = function(filename, header) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
9 if (header == "true") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
10 # Read only first line of the file as header:
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
11 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
12 #Read the data of the files (skipping the first row)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
13 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
14 # Remove empty rows
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
15 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
16 #And assign the header to the data
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
17 names(file) <- headers
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
18 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
19 else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
20 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
21 # Remove empty rows
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
22 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
23 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
24 return(file)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
25 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
26
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
27 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
28 ggo<-groupGO(gene=geneid,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
29 OrgDb = orgdb,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
30 ont=ontology,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
31 level=level,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
32 readable=TRUE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
33 name <- paste("GGO.", ontology, ".png", sep = "")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
34 png(name)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
35 p <- barplot(ggo, showCategory=10)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
36 print(p)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
37 dev.off()
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
38 return(ggo)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
39 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
40
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
41 # GO over-representation test
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
42 enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
43 ego<-enrichGO(gene=geneid,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
44 universe=universe,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
45 OrgDb=orgdb,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
46 keytype="ENTREZID",
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
47 ont=ontology,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
48 pAdjustMethod="BH",
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
49 pvalueCutoff=pval_cutoff,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
50 qvalueCutoff=qval_cutoff,
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
51 readable=TRUE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
52 # Plot bar & dot plots
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
53 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
54 png(bar_name)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
55 p <- barplot(ego)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
56 print(p)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
57 dev.off()
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
58 dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
59 png(dot_name)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
60 p <- dotplot(ego, showCategory=10)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
61 print(p)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
62 dev.off()
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
63 return(ego)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
64 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
65
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
66 check_ids <- function(vector,type) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
67 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
68 entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
69 if (type == "entrez")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
70 return(grepl(entrez_id,vector))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
71 else if (type == "uniprot") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
72 return(grepl(uniprot_pattern,vector))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
73 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
74 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
75
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
76 clusterProfiler = function() {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
77 args <- commandArgs(TRUE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
78 if(length(args)<1) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
79 args <- c("--help")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
80 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
81
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
82 # Help section
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
83 if("--help" %in% args) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
84 cat("clusterProfiler Enrichment Analysis
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
85 Arguments:
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
86 --input_type: type of input (list of id or filename)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
87 --input: input
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
88 --ncol: the column number which contains list of input IDs
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
89 --header: true/false if your file contains a header
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
90 --id_type: the type of input IDs (UniProt/EntrezID)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
91 --universe_type: list or filename
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
92 --universe: background IDs list
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
93 --uncol: the column number which contains background IDs list
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
94 --uheader: true/false if the background IDs file contains header
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
95 --universe_id_type: the type of universe IDs (UniProt/EntrezID)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
96 --species
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
97 --onto_opt: ontology options
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
98 --go_function: groupGO/enrichGO
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
99 --level: 1-3
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
100 --pval_cutoff
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
101 --qval_cutoff
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
102 --text_output: text output filename \n")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
103 q(save="no")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
104 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
105 # Parse arguments
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
106 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
107 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
108 args <- as.list(as.character(argsDF$V2))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
109 names(args) <- argsDF$V1
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
110 #print(args)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
111
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
112 #save(args,file="args.Rda")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
113 load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
114
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
115 # Extract OrgDb
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
116 if (args$species=="human") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
117 orgdb<-org.Hs.eg.db
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
118 } else if (args$species=="mouse") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
119 orgdb<-org.Mm.eg.db
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
120 } else if (args$species=="rat") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
121 orgdb<-org.Rn.eg.db
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
122 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
123
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
124 # Extract input IDs
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
125 input_type = args$input_type
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
126 if (input_type == "text") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
127 input = strsplit(args$input, "[ \t\n]+")[[1]]
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
128 } else if (input_type == "file") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
129 filename = args$input
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
130 ncol = args$ncol
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
131 # Check ncol
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
132 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
133 stop("Please enter the right format for column number: c[number]")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
134 } else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
135 ncol = as.numeric(gsub("c", "", ncol))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
136 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
137 header = args$header
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
138 # Get file content
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
139 file = readfile(filename, header)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
140 # Extract Protein IDs list
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
141 input = sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
142 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
143 id_type = args$id_type
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
144 ## Get input gene list from input IDs
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
145 #ID format Conversion
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
146 #This case : from UNIPROT (protein id) to ENTREZ (gene id)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
147 #bitr = conversion function from clusterProfiler
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
148 if (id_type=="Uniprot" & any(check_ids(input,"uniprot"))) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
149 any(check_ids(input,"uniprot"))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
150 idFrom<-"UNIPROT"
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
151 idTo<-"ENTREZID"
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
152 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
153 gene<-unique(gene$ENTREZID)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
154 } else if (id_type=="Entrez" & any(check_ids(input,"entrez"))) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
155 gene<-unique(input)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
156 } else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
157 print(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file"))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
158 stop()
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
159 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
160
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
161 ontology <- strsplit(args$onto_opt, ",")[[1]]
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
162 ## Extract GGO/EGO arguments
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
163 if (args$go_represent == "true") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
164 go_represent <- args$go_represent
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
165 level <- as.numeric(args$level)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
166 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
167 if (args$go_enrich == "true") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
168 go_enrich <- args$go_enrich
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
169 pval_cutoff <- as.numeric(args$pval_cutoff)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
170 qval_cutoff <- as.numeric(args$qval_cutoff)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
171 # Extract universe background genes (same as input file)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
172 if (!is.null(args$universe_type)) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
173 universe_type = args$universe_type
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
174 if (universe_type == "text") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
175 universe = strsplit(args$universe, "[ \t\n]+")[[1]]
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
176 } else if (universe_type == "file") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
177 universe_filename = args$universe
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
178 universe_ncol = args$uncol
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
179 # Check ncol
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
180 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
181 stop("Please enter the right format for column number: c[number]")
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
182 } else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
183 universe_ncol = as.numeric(gsub("c", "", universe_ncol))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
184 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
185 universe_header = args$uheader
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
186 # Get file content
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
187 universe_file = readfile(universe_filename, universe_header)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
188 # Extract Protein IDs list
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
189 universe <- sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
190 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
191 universe_id_type = args$universe_id_type
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
192 ##to initialize
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
193 if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
194 idFrom<-"UNIPROT"
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
195 idTo<-"ENTREZID"
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
196 universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
197 universe_gene<-unique(universe_gene$ENTREZID)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
198 } else if (universe_id_type=="Entrez" & any(check_ids(universe,"entrez"))) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
199 universe_gene<-unique(universe)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
200 } else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
201 if (universe_type=="text"){
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
202 print(paste(universe_id_type,"not found in your background IDs list",sep=" "))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
203 } else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
204 print(paste(universe_id_type,"not found in the column",universe_ncol,"of your background IDs file",sep=" "))
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
205 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
206 universe_gene = NULL
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
207 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
208 } else {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
209 universe_gene = NULL
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
210 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
211 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
212
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
213 ##enrichGO : GO over-representation test
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
214 for (onto in ontology) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
215 if (args$go_represent == "true") {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
216 ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
217 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
218 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
219 if (args$go_enrich == "true" & !is.null(universe_gene)) {
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
220 ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
221 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
222 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
223 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
224 }
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
225
076349b72690 planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
proteore
parents:
diff changeset
226 clusterProfiler()