Mercurial > repos > proteore > proteore_go_terms_profiles_comparison
comparison GO_prof_comp.R @ 3:e337dcfb84e4 draft
planemo upload commit 51fc514a85c1055cab5bb6e76c90f3da7e648101-dirty
| author | proteore |
|---|---|
| date | Thu, 07 Mar 2019 09:20:58 -0500 |
| parents | 67a4f68f1c1c |
| children |
comparison
equal
deleted
inserted
replaced
| 2:67a4f68f1c1c | 3:e337dcfb84e4 |
|---|---|
| 1 options(warn=-1) #TURN OFF WARNINGS !!!!!! | 1 options(warn=-1) #TURN OFF WARNINGS !!!!!! |
| 2 suppressMessages(library(clusterProfiler,quietly = TRUE)) | 2 suppressMessages(library(clusterProfiler,quietly = TRUE)) |
| 3 suppressMessages(library(plyr, quietly = TRUE)) | |
| 4 suppressMessages(library(ggplot2, quietly = TRUE)) | |
| 5 suppressMessages(library(DOSE, quietly = TRUE)) | |
| 3 | 6 |
| 4 #return the number of character from the longest description found (from the 10 first) | 7 #return the number of character from the longest description found (from the 10 first) |
| 5 max_str_length_10_first <- function(vector){ | 8 max_str_length_10_first <- function(vector){ |
| 6 vector <- as.vector(vector) | 9 vector <- as.vector(vector) |
| 7 nb_description = length(vector) | 10 nb_description = length(vector) |
| 93 else if (type == "uniprot") { | 96 else if (type == "uniprot") { |
| 94 return(grepl(uniprot_pattern,vector)) | 97 return(grepl(uniprot_pattern,vector)) |
| 95 } | 98 } |
| 96 } | 99 } |
| 97 | 100 |
| 101 #res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE) | |
| 102 fortify.compareClusterResult <- function(res.cmp, showCategory=30, by="geneRatio", split=NULL, includeAll=TRUE) { | |
| 103 clProf.df <- as.data.frame(res.cmp) | |
| 104 .split <- split | |
| 105 ## get top 5 (default) categories of each gene cluster. | |
| 106 if (is.null(showCategory)) { | |
| 107 result <- clProf.df | |
| 108 } else { | |
| 109 Cluster <- NULL # to satisfy codetools | |
| 110 topN <- function(res, showCategory) { | |
| 111 ddply(.data = res, .variables = .(Cluster), .fun = function(df, N) { | |
| 112 if (length(df$Count) > N) { | |
| 113 if (any(colnames(df) == "pvalue")) { | |
| 114 idx <- order(df$pvalue, decreasing=FALSE)[1:N] | |
| 115 } else { | |
| 116 ## for groupGO | |
| 117 idx <- order(df$Count, decreasing=T)[1:N] | |
| 118 } | |
| 119 return(df[idx,]) | |
| 120 } else { | |
| 121 return(df) | |
| 122 } | |
| 123 }, | |
| 124 N=showCategory | |
| 125 ) | |
| 126 } | |
| 127 if (!is.null(.split) && .split %in% colnames(clProf.df)) { | |
| 128 lres <- split(clProf.df, as.character(clProf.df[, .split])) | |
| 129 lres <- lapply(lres, topN, showCategory = showCategory) | |
| 130 result <- do.call('rbind', lres) | |
| 131 } else { | |
| 132 result <- topN(clProf.df, showCategory) | |
| 133 } | |
| 134 } | |
| 135 ID <- NULL | |
| 136 if (includeAll == TRUE) { | |
| 137 result = subset(clProf.df, ID %in% result$ID) | |
| 138 } | |
| 139 ## remove zero count | |
| 140 result$Description <- as.character(result$Description) ## un-factor | |
| 141 GOlevel <- result[,c("ID", "Description")] ## GO ID and Term | |
| 142 GOlevel <- unique(GOlevel) | |
| 143 result <- result[result$Count != 0, ] | |
| 144 result$Description <- factor(result$Description,levels=rev(GOlevel[,2])) | |
| 145 if (by=="rowPercentage") { | |
| 146 Description <- Count <- NULL # to satisfy codetools | |
| 147 result <- ddply(result,.(Description),transform,Percentage = Count/sum(Count),Total = sum(Count)) | |
| 148 ## label GO Description with gene counts. | |
| 149 x <- mdply(result[, c("Description", "Total")], paste, sep=" (") | |
| 150 y <- sapply(x[,3], paste, ")", sep="") | |
| 151 result$Description <- y | |
| 152 | |
| 153 ## restore the original order of GO Description | |
| 154 xx <- result[,c(2,3)] | |
| 155 xx <- unique(xx) | |
| 156 rownames(xx) <- xx[,1] | |
| 157 Termlevel <- xx[as.character(GOlevel[,1]),2] | |
| 158 | |
| 159 ##drop the *Total* column | |
| 160 result <- result[, colnames(result) != "Total"] | |
| 161 result$Description <- factor(result$Description, levels=rev(Termlevel)) | |
| 162 | |
| 163 } else if (by == "count") { | |
| 164 ## nothing | |
| 165 } else if (by == "geneRatio") { ##default | |
| 166 gsize <- as.numeric(sub("/\\d+$", "", as.character(result$GeneRatio))) | |
| 167 gcsize <- as.numeric(sub("^\\d+/", "", as.character(result$GeneRatio))) | |
| 168 result$GeneRatio = gsize/gcsize | |
| 169 cluster <- paste(as.character(result$Cluster),"\n", "(", gcsize, ")", sep="") | |
| 170 lv <- unique(cluster)[order(as.numeric(unique(result$Cluster)))] | |
| 171 result$Cluster <- factor(cluster, levels = lv) | |
| 172 } else { | |
| 173 ## nothing | |
| 174 } | |
| 175 return(result) | |
| 176 } | |
| 177 | |
| 178 ##function plotting.clusteProfile from clusterProfiler pkg | |
| 179 plotting.clusterProfile <- function(clProf.reshape.df,x = ~Cluster,type = "dot", colorBy = "p.adjust",by = "geneRatio",title="",font.size=12) { | |
| 180 | |
| 181 Description <- Percentage <- Count <- Cluster <- GeneRatio <- p.adjust <- pvalue <- NULL # to | |
| 182 if (type == "dot") { | |
| 183 if (by == "rowPercentage") { | |
| 184 p <- ggplot(clProf.reshape.df, | |
| 185 aes_(x = x, y = ~Description, size = ~Percentage)) | |
| 186 } else if (by == "count") { | |
| 187 p <- ggplot(clProf.reshape.df, | |
| 188 aes_(x = x, y = ~Description, size = ~Count)) | |
| 189 } else if (by == "geneRatio") { ##DEFAULT | |
| 190 p <- ggplot(clProf.reshape.df, | |
| 191 aes_(x = x, y = ~Description, size = ~GeneRatio)) | |
| 192 } else { | |
| 193 ## nothing here | |
| 194 } | |
| 195 if (any(colnames(clProf.reshape.df) == colorBy)) { | |
| 196 p <- p + | |
| 197 geom_point() + | |
| 198 aes_string(color=colorBy) + | |
| 199 scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=TRUE)) | |
| 200 ## scale_color_gradientn(guide=guide_colorbar(reverse=TRUE), colors = enrichplot:::sig_palette) | |
| 201 } else { | |
| 202 p <- p + geom_point(colour="steelblue") | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 p <- p + xlab("") + ylab("") + ggtitle(title) + | |
| 207 theme_dose(font.size) | |
| 208 | |
| 209 ## theme(axis.text.x = element_text(colour="black", size=font.size, vjust = 1)) + | |
| 210 ## theme(axis.text.y = element_text(colour="black", | |
| 211 ## size=font.size, hjust = 1)) + | |
| 212 ## ggtitle(title)+theme_bw() | |
| 213 ## p <- p + theme(axis.text.x = element_text(angle=angle.axis.x, | |
| 214 ## hjust=hjust.axis.x, | |
| 215 ## vjust=vjust.axis.x)) | |
| 216 | |
| 217 return(p) | |
| 218 } | |
| 219 | |
| 98 make_dotplot<-function(res.cmp,ontology) { | 220 make_dotplot<-function(res.cmp,ontology) { |
| 99 | 221 |
| 100 res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE) | 222 dfok<-fortify.compareClusterResult(res.cmp) |
| 223 dfok$Description <- sapply(as.vector(dfok$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE) | |
| 224 p<-plotting.clusterProfile(dfok, title="") | |
| 225 | |
| 226 #plot(p, type="dot") # | |
| 101 output_path= paste("GO_profiles_comp_",ontology,".png",sep="") | 227 output_path= paste("GO_profiles_comp_",ontology,".png",sep="") |
| 102 png(output_path,height = 720, width = 600) | 228 png(output_path,height = 720, width = 600) |
| 103 p <- dotplot(res.cmp, showCategory=30) | 229 pl <- plot(p, type="dot") |
| 104 print(p) | 230 print(pl) |
| 105 dev.off() | 231 dev.off() |
| 106 } | 232 } |
| 107 | 233 |
| 108 get_cols <-function(input_cols) { | 234 get_cols <-function(input_cols) { |
| 109 input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols))) | 235 input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols))) |
| 110 if (grepl(":",input_cols)) { | 236 if (grepl(":",input_cols)) { |
| 142 main = function() { | 268 main = function() { |
| 143 | 269 |
| 144 #to get the args of the command line | 270 #to get the args of the command line |
| 145 args=get_args() | 271 args=get_args() |
| 146 | 272 |
| 147 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda") | 273 |
| 148 #load("/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda") | |
| 149 | |
| 150 ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1) | 274 ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1) |
| 151 ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2) | 275 ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2) |
| 152 ont = strsplit(args$ont, ",")[[1]] | 276 ont = strsplit(args$ont, ",")[[1]] |
| 153 lev=as.integer(args$lev) | 277 lev=as.integer(args$lev) |
| 154 org=args$org | 278 org=args$org |
