comparison GO_prof_comp.R @ 3:e337dcfb84e4 draft

planemo upload commit 51fc514a85c1055cab5bb6e76c90f3da7e648101-dirty
author proteore
date Thu, 07 Mar 2019 09:20:58 -0500
parents 67a4f68f1c1c
children
comparison
equal deleted inserted replaced
2:67a4f68f1c1c 3:e337dcfb84e4
1 options(warn=-1) #TURN OFF WARNINGS !!!!!! 1 options(warn=-1) #TURN OFF WARNINGS !!!!!!
2 suppressMessages(library(clusterProfiler,quietly = TRUE)) 2 suppressMessages(library(clusterProfiler,quietly = TRUE))
3 suppressMessages(library(plyr, quietly = TRUE))
4 suppressMessages(library(ggplot2, quietly = TRUE))
5 suppressMessages(library(DOSE, quietly = TRUE))
3 6
4 #return the number of character from the longest description found (from the 10 first) 7 #return the number of character from the longest description found (from the 10 first)
5 max_str_length_10_first <- function(vector){ 8 max_str_length_10_first <- function(vector){
6 vector <- as.vector(vector) 9 vector <- as.vector(vector)
7 nb_description = length(vector) 10 nb_description = length(vector)
93 else if (type == "uniprot") { 96 else if (type == "uniprot") {
94 return(grepl(uniprot_pattern,vector)) 97 return(grepl(uniprot_pattern,vector))
95 } 98 }
96 } 99 }
97 100
101 #res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE)
102 fortify.compareClusterResult <- function(res.cmp, showCategory=30, by="geneRatio", split=NULL, includeAll=TRUE) {
103 clProf.df <- as.data.frame(res.cmp)
104 .split <- split
105 ## get top 5 (default) categories of each gene cluster.
106 if (is.null(showCategory)) {
107 result <- clProf.df
108 } else {
109 Cluster <- NULL # to satisfy codetools
110 topN <- function(res, showCategory) {
111 ddply(.data = res, .variables = .(Cluster), .fun = function(df, N) {
112 if (length(df$Count) > N) {
113 if (any(colnames(df) == "pvalue")) {
114 idx <- order(df$pvalue, decreasing=FALSE)[1:N]
115 } else {
116 ## for groupGO
117 idx <- order(df$Count, decreasing=T)[1:N]
118 }
119 return(df[idx,])
120 } else {
121 return(df)
122 }
123 },
124 N=showCategory
125 )
126 }
127 if (!is.null(.split) && .split %in% colnames(clProf.df)) {
128 lres <- split(clProf.df, as.character(clProf.df[, .split]))
129 lres <- lapply(lres, topN, showCategory = showCategory)
130 result <- do.call('rbind', lres)
131 } else {
132 result <- topN(clProf.df, showCategory)
133 }
134 }
135 ID <- NULL
136 if (includeAll == TRUE) {
137 result = subset(clProf.df, ID %in% result$ID)
138 }
139 ## remove zero count
140 result$Description <- as.character(result$Description) ## un-factor
141 GOlevel <- result[,c("ID", "Description")] ## GO ID and Term
142 GOlevel <- unique(GOlevel)
143 result <- result[result$Count != 0, ]
144 result$Description <- factor(result$Description,levels=rev(GOlevel[,2]))
145 if (by=="rowPercentage") {
146 Description <- Count <- NULL # to satisfy codetools
147 result <- ddply(result,.(Description),transform,Percentage = Count/sum(Count),Total = sum(Count))
148 ## label GO Description with gene counts.
149 x <- mdply(result[, c("Description", "Total")], paste, sep=" (")
150 y <- sapply(x[,3], paste, ")", sep="")
151 result$Description <- y
152
153 ## restore the original order of GO Description
154 xx <- result[,c(2,3)]
155 xx <- unique(xx)
156 rownames(xx) <- xx[,1]
157 Termlevel <- xx[as.character(GOlevel[,1]),2]
158
159 ##drop the *Total* column
160 result <- result[, colnames(result) != "Total"]
161 result$Description <- factor(result$Description, levels=rev(Termlevel))
162
163 } else if (by == "count") {
164 ## nothing
165 } else if (by == "geneRatio") { ##default
166 gsize <- as.numeric(sub("/\\d+$", "", as.character(result$GeneRatio)))
167 gcsize <- as.numeric(sub("^\\d+/", "", as.character(result$GeneRatio)))
168 result$GeneRatio = gsize/gcsize
169 cluster <- paste(as.character(result$Cluster),"\n", "(", gcsize, ")", sep="")
170 lv <- unique(cluster)[order(as.numeric(unique(result$Cluster)))]
171 result$Cluster <- factor(cluster, levels = lv)
172 } else {
173 ## nothing
174 }
175 return(result)
176 }
177
178 ##function plotting.clusteProfile from clusterProfiler pkg
179 plotting.clusterProfile <- function(clProf.reshape.df,x = ~Cluster,type = "dot", colorBy = "p.adjust",by = "geneRatio",title="",font.size=12) {
180
181 Description <- Percentage <- Count <- Cluster <- GeneRatio <- p.adjust <- pvalue <- NULL # to
182 if (type == "dot") {
183 if (by == "rowPercentage") {
184 p <- ggplot(clProf.reshape.df,
185 aes_(x = x, y = ~Description, size = ~Percentage))
186 } else if (by == "count") {
187 p <- ggplot(clProf.reshape.df,
188 aes_(x = x, y = ~Description, size = ~Count))
189 } else if (by == "geneRatio") { ##DEFAULT
190 p <- ggplot(clProf.reshape.df,
191 aes_(x = x, y = ~Description, size = ~GeneRatio))
192 } else {
193 ## nothing here
194 }
195 if (any(colnames(clProf.reshape.df) == colorBy)) {
196 p <- p +
197 geom_point() +
198 aes_string(color=colorBy) +
199 scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=TRUE))
200 ## scale_color_gradientn(guide=guide_colorbar(reverse=TRUE), colors = enrichplot:::sig_palette)
201 } else {
202 p <- p + geom_point(colour="steelblue")
203 }
204 }
205
206 p <- p + xlab("") + ylab("") + ggtitle(title) +
207 theme_dose(font.size)
208
209 ## theme(axis.text.x = element_text(colour="black", size=font.size, vjust = 1)) +
210 ## theme(axis.text.y = element_text(colour="black",
211 ## size=font.size, hjust = 1)) +
212 ## ggtitle(title)+theme_bw()
213 ## p <- p + theme(axis.text.x = element_text(angle=angle.axis.x,
214 ## hjust=hjust.axis.x,
215 ## vjust=vjust.axis.x))
216
217 return(p)
218 }
219
98 make_dotplot<-function(res.cmp,ontology) { 220 make_dotplot<-function(res.cmp,ontology) {
99 221
100 res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE) 222 dfok<-fortify.compareClusterResult(res.cmp)
223 dfok$Description <- sapply(as.vector(dfok$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE)
224 p<-plotting.clusterProfile(dfok, title="")
225
226 #plot(p, type="dot") #
101 output_path= paste("GO_profiles_comp_",ontology,".png",sep="") 227 output_path= paste("GO_profiles_comp_",ontology,".png",sep="")
102 png(output_path,height = 720, width = 600) 228 png(output_path,height = 720, width = 600)
103 p <- dotplot(res.cmp, showCategory=30) 229 pl <- plot(p, type="dot")
104 print(p) 230 print(pl)
105 dev.off() 231 dev.off()
106 } 232 }
107 233
108 get_cols <-function(input_cols) { 234 get_cols <-function(input_cols) {
109 input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols))) 235 input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols)))
110 if (grepl(":",input_cols)) { 236 if (grepl(":",input_cols)) {
142 main = function() { 268 main = function() {
143 269
144 #to get the args of the command line 270 #to get the args of the command line
145 args=get_args() 271 args=get_args()
146 272
147 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda") 273
148 #load("/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda")
149
150 ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1) 274 ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1)
151 ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2) 275 ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2)
152 ont = strsplit(args$ont, ",")[[1]] 276 ont = strsplit(args$ont, ",")[[1]]
153 lev=as.integer(args$lev) 277 lev=as.integer(args$lev)
154 org=args$org 278 org=args$org