comparison GO_prof_comp.R @ 0:a0ea9e00bd66 draft

planemo upload commit e5e4a3efd1cb1498724c0e93f8ec9606d0ed3045-dirty
author proteore
date Thu, 28 Feb 2019 05:53:47 -0500
parents
children 67a4f68f1c1c
comparison
equal deleted inserted replaced
-1:000000000000 0:a0ea9e00bd66
1 options(warn=-1) #TURN OFF WARNINGS !!!!!!
2 suppressMessages(library(clusterProfiler,quietly = TRUE))
3
4 #return the number of character from the longest description found (from the 10 first)
5 max_str_length_10_first <- function(vector){
6 vector <- as.vector(vector)
7 nb_description = length(vector)
8 if (nb_description >= 10){nb_description=10}
9 return(max(nchar(vector[1:nb_description])))
10 }
11
12 str2bool <- function(x){
13 if (any(is.element(c("t","true"),tolower(x)))){
14 return (TRUE)
15 }else if (any(is.element(c("f","false"),tolower(x)))){
16 return (FALSE)
17 }else{
18 return(NULL)
19 }
20 }
21
22 get_args <- function(){
23
24 ## Collect arguments
25 args <- commandArgs(TRUE)
26
27 ## Default setting when no arguments passed
28 if(length(args) < 1) {
29 args <- c("--help")
30 }
31
32 ## Help section
33 if("--help" %in% args) {
34 cat("Selection and Annotation HPA
35 Arguments:
36 --inputtype1: type of input (list of id or filename)
37 --inputtype2: type of input (list of id or filename)
38 --input1: input1
39 --input2: input2
40 --column1: the column number which you would like to apply...
41 --column2: the column number which you would like to apply...
42 --header1: true/false if your file contains a header
43 --header2: true/false if your file contains a header
44 --ont: ontology to use
45 --lev: ontology level
46 --org: organism db package
47 --list_name1: name of the first list
48 --list_name2: name of the second list \n")
49
50 q(save="no")
51 }
52
53 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
54 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
55 args <- as.list(as.character(argsDF$V2))
56 names(args) <- argsDF$V1
57
58 return(args)
59 }
60
61 get_ids=function(inputtype, input, ncol, header) {
62
63 if (inputtype == "text") {
64 ids = strsplit(input, "[ \t\n]+")[[1]]
65 } else if (inputtype == "file") {
66 header=str2bool(header)
67 ncol=get_cols(ncol)
68 csv = read.csv(input,header=header, sep="\t", as.is=T)
69 ids=csv[,ncol]
70 }
71
72 ids = unlist(strsplit(ids,";"))
73 ids = ids[which(!is.na(ids))]
74
75 return(ids)
76 }
77
78 str2bool <- function(x){
79 if (any(is.element(c("t","true"),tolower(x)))){
80 return (TRUE)
81 }else if (any(is.element(c("f","false"),tolower(x)))){
82 return (FALSE)
83 }else{
84 return(NULL)
85 }
86 }
87
88 check_ids <- function(vector,type) {
89 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
90 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
91 if (type == "entrez")
92 return(grepl(entrez_id,vector))
93 else if (type == "uniprot") {
94 return(grepl(uniprot_pattern,vector))
95 }
96 }
97
98 make_dotplot<-function(res.cmp,ontology) {
99
100 res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE)
101 output_path= paste("GO_profiles_comp_",ontology,".png",sep="")
102 png(output_path,height = 720, width = 600)
103 p <- dotplot(res.cmp, showCategory=30)
104 print(p)
105 dev.off()
106 }
107
108 get_cols <-function(input_cols) {
109 input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols)))
110 if (grepl(":",input_cols)) {
111 first_col=unlist(strsplit(input_cols,":"))[1]
112 last_col=unlist(strsplit(input_cols,":"))[2]
113 cols=first_col:last_col
114 } else {
115 cols = as.integer(unlist(strsplit(input_cols,",")))
116 }
117 return(cols)
118 }
119
120 #to check
121 cmp.GO <- function(l,fun="groupGO",orgdb, ontology, level=3, readable=TRUE) {
122 cmpGO<-compareCluster(geneClusters = l,
123 fun=fun,
124 OrgDb = orgdb,
125 ont=ontology,
126 level=level,
127 readable=TRUE)
128
129 return(cmpGO)
130 }
131
132 check_ids <- function(vector,type) {
133 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
134 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
135 if (type == "entrez")
136 return(grepl(entrez_id,vector))
137 else if (type == "uniprot") {
138 return(grepl(uniprot_pattern,vector))
139 }
140 }
141
142 main = function() {
143
144 #to get the args of the command line
145 args=get_args()
146
147 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda")
148 #load("/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda")
149
150 ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1)
151 ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2)
152 ont = strsplit(args$ont, ",")[[1]]
153 lev=as.integer(args$lev)
154 org=args$org
155
156 #load annot package
157 suppressMessages(library(args$org, character.only = TRUE, quietly = TRUE))
158
159 # Extract OrgDb
160 if (args$org=="org.Hs.eg.db") {
161 orgdb<-org.Hs.eg.db
162 } else if (args$org=="org.Mm.eg.db") {
163 orgdb<-org.Mm.eg.db
164 } else if (args$org=="org.Rn.eg.db") {
165 orgdb<-org.Rn.eg.db
166 }
167
168 for(ontology in ont) {
169 liste = list("l1"=ids1,"l2"=ids2)
170 names(liste) = c(args$list_name1,args$list_name2)
171 res.cmp<-cmp.GO(l=liste,fun="groupGO",orgdb, ontology, level=lev, readable=TRUE)
172 make_dotplot(res.cmp,ontology)
173 output_path = paste("GO_profiles_comp_",ontology,".tsv",sep="")
174 write.table(res.cmp@compareClusterResult, output_path, sep="\t", row.names=F, quote=F)
175 }
176
177 } #end main
178
179 main()
180