diff GO-enrich.R @ 4:18275725e7cf draft

planemo upload commit 9bdfcce89bdea8a0a85bfbf8f0fa9b943b17bea1-dirty
author proteore
date Mon, 17 Sep 2018 10:30:30 -0400
parents 67a796154e2a
children 36c586c918eb
line wrap: on
line diff
--- a/GO-enrich.R	Wed Sep 05 09:34:57 2018 -0400
+++ b/GO-enrich.R	Mon Sep 17 10:30:30 2018 -0400
@@ -1,7 +1,5 @@
 suppressMessages(library(clusterProfiler,quietly = TRUE))
 
-#library(org.Sc.sgd.db,quietly = TRUE)
-
 # Read file and return file content as data.frame
 readfile = function(filename, header) {
   if (header == "true") {
@@ -22,14 +20,29 @@
   return(file)
 }
 
+max_str_length_10_first <- function(vector){
+  vector <- as.vector(vector)
+  nb_description = length(vector)
+  if (nb_description >= 10){nb_description=10}
+  return(max(nchar(vector[1:nb_description])))
+}
+
+
 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
   ggo<-groupGO(gene=geneid, 
                OrgDb = orgdb, 
                ont=ontology, 
                level=level, 
                readable=TRUE)
-  name <- paste("GGO.", ontology, ".png", sep = "")
-  png(name)
+  
+  if (max_str_length_10_first(ggo$Description) > 50 ){
+    width=720
+  } else {
+    width=600  
+  } 
+  
+  name <- paste("GGO_", ontology, "_bar-plot", sep = "")
+  png(name,height = 720, width = width)
   p <- barplot(ggo, showCategory=10)
   print(p)
   dev.off()
@@ -37,31 +50,42 @@
 }
 
 # GO over-representation test
-enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) {
+enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
   ego<-enrichGO(gene=geneid,
                 universe=universe,
                 OrgDb=orgdb,
+                ont=ontology,
                 keytype="ENTREZID",
-                ont=ontology,
                 pAdjustMethod="BH",
                 pvalueCutoff=pval_cutoff,
                 qvalueCutoff=qval_cutoff,
                 readable=TRUE)
   
+  if (max_str_length_10_first(ego$Description) > 50 ){
+    width=800
+  } else {
+    width=600  
+  }
+  
   # Plot bar & dot plots
   #if there are enriched GopTerms
   if (length(ego$ID)>0){
-    bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
-    png(bar_name)
+    if ("dotplot" %in% plot ){
+    dot_name <- paste("EGO_", ontology, "_dot-plot", sep = "")
+    png(dot_name,height = 720, width = width)
+    p <- dotplot(ego, showCategory=10)
+    print(p)
+    dev.off()
+    }
+
+    if ("barplot" %in% plot ){
+    bar_name <- paste("EGO_", ontology, "_bar-plot", sep = "")
+    png(bar_name,height = 720, width = width)
     p <- barplot(ego)
     print(p)
     dev.off()
-    dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
-    png(dot_name)
-    p <- dotplot(ego, showCategory=10)
-    print(p)
-    dev.off()
     return(ego)
+    }
   } else {
     warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE)
   }
@@ -69,7 +93,7 @@
 
 check_ids <- function(vector,type) {
   uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
-  entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
+  entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
   if (type == "entrez")
     return(grepl(entrez_id,vector))
   else if (type == "uniprot") {
@@ -103,7 +127,8 @@
         --level: 1-3
         --pval_cutoff
         --qval_cutoff
-        --text_output: text output filename \n")
+        --text_output: text output filename 
+        --plot : type of visualization, dotplot or/and barplot \n")
     q(save="no")
   }
   # Parse arguments
@@ -111,6 +136,7 @@
   argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1
+  plot = unlist(strsplit(args$plot,","))
   #print(args)
   
   #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
@@ -123,7 +149,7 @@
     orgdb<-org.Hs.eg.db
   } else if (args$species=="org.Mm.eg.db") {
     orgdb<-org.Mm.eg.db
-  } else if (args$species=="org.Sc.eg.db") {
+  } else if (args$species=="org.Rn.eg.db") {
     orgdb<-org.Rn.eg.db
   }
 
@@ -214,17 +240,22 @@
     } else {
       universe_gene = NULL
     }
+  } else {
+    universe_gene = NULL
   }
 
   ##enrichGO : GO over-representation test
   for (onto in ontology) {
     if (args$go_represent == "true") {
       ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
-      write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+      output_path = paste("cluster_profiler_GGO_",onto,".csv",sep="")
+      write.table(ggo, output_path, sep="\t", row.names = FALSE, quote=FALSE)
     }
-    if (args$go_enrich == "true" & !is.null(universe_gene)) {
-      ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff)
-      write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+
+    if (args$go_enrich == "true") {
+      ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
+      output_path = paste("cluster_profiler_EGO_",onto,".csv",sep="")
+      write.table(ego, output_path, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
     }
   }
 }