changeset 4:18275725e7cf draft

planemo upload commit 9bdfcce89bdea8a0a85bfbf8f0fa9b943b17bea1-dirty
author proteore
date Mon, 17 Sep 2018 10:30:30 -0400
parents 67a796154e2a
children 36c586c918eb
files GO-enrich.R cluster_profiler.xml
diffstat 2 files changed, 80 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/GO-enrich.R	Wed Sep 05 09:34:57 2018 -0400
+++ b/GO-enrich.R	Mon Sep 17 10:30:30 2018 -0400
@@ -1,7 +1,5 @@
 suppressMessages(library(clusterProfiler,quietly = TRUE))
 
-#library(org.Sc.sgd.db,quietly = TRUE)
-
 # Read file and return file content as data.frame
 readfile = function(filename, header) {
   if (header == "true") {
@@ -22,14 +20,29 @@
   return(file)
 }
 
+max_str_length_10_first <- function(vector){
+  vector <- as.vector(vector)
+  nb_description = length(vector)
+  if (nb_description >= 10){nb_description=10}
+  return(max(nchar(vector[1:nb_description])))
+}
+
+
 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
   ggo<-groupGO(gene=geneid, 
                OrgDb = orgdb, 
                ont=ontology, 
                level=level, 
                readable=TRUE)
-  name <- paste("GGO.", ontology, ".png", sep = "")
-  png(name)
+  
+  if (max_str_length_10_first(ggo$Description) > 50 ){
+    width=720
+  } else {
+    width=600  
+  } 
+  
+  name <- paste("GGO_", ontology, "_bar-plot", sep = "")
+  png(name,height = 720, width = width)
   p <- barplot(ggo, showCategory=10)
   print(p)
   dev.off()
@@ -37,31 +50,42 @@
 }
 
 # GO over-representation test
-enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) {
+enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
   ego<-enrichGO(gene=geneid,
                 universe=universe,
                 OrgDb=orgdb,
+                ont=ontology,
                 keytype="ENTREZID",
-                ont=ontology,
                 pAdjustMethod="BH",
                 pvalueCutoff=pval_cutoff,
                 qvalueCutoff=qval_cutoff,
                 readable=TRUE)
   
+  if (max_str_length_10_first(ego$Description) > 50 ){
+    width=800
+  } else {
+    width=600  
+  }
+  
   # Plot bar & dot plots
   #if there are enriched GopTerms
   if (length(ego$ID)>0){
-    bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
-    png(bar_name)
+    if ("dotplot" %in% plot ){
+    dot_name <- paste("EGO_", ontology, "_dot-plot", sep = "")
+    png(dot_name,height = 720, width = width)
+    p <- dotplot(ego, showCategory=10)
+    print(p)
+    dev.off()
+    }
+
+    if ("barplot" %in% plot ){
+    bar_name <- paste("EGO_", ontology, "_bar-plot", sep = "")
+    png(bar_name,height = 720, width = width)
     p <- barplot(ego)
     print(p)
     dev.off()
-    dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
-    png(dot_name)
-    p <- dotplot(ego, showCategory=10)
-    print(p)
-    dev.off()
     return(ego)
+    }
   } else {
     warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE)
   }
@@ -69,7 +93,7 @@
 
 check_ids <- function(vector,type) {
   uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
-  entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
+  entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
   if (type == "entrez")
     return(grepl(entrez_id,vector))
   else if (type == "uniprot") {
@@ -103,7 +127,8 @@
         --level: 1-3
         --pval_cutoff
         --qval_cutoff
-        --text_output: text output filename \n")
+        --text_output: text output filename 
+        --plot : type of visualization, dotplot or/and barplot \n")
     q(save="no")
   }
   # Parse arguments
@@ -111,6 +136,7 @@
   argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1
+  plot = unlist(strsplit(args$plot,","))
   #print(args)
   
   #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
@@ -123,7 +149,7 @@
     orgdb<-org.Hs.eg.db
   } else if (args$species=="org.Mm.eg.db") {
     orgdb<-org.Mm.eg.db
-  } else if (args$species=="org.Sc.eg.db") {
+  } else if (args$species=="org.Rn.eg.db") {
     orgdb<-org.Rn.eg.db
   }
 
@@ -214,17 +240,22 @@
     } else {
       universe_gene = NULL
     }
+  } else {
+    universe_gene = NULL
   }
 
   ##enrichGO : GO over-representation test
   for (onto in ontology) {
     if (args$go_represent == "true") {
       ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
-      write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+      output_path = paste("cluster_profiler_GGO_",onto,".csv",sep="")
+      write.table(ggo, output_path, sep="\t", row.names = FALSE, quote=FALSE)
     }
-    if (args$go_enrich == "true" & !is.null(universe_gene)) {
-      ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff)
-      write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+
+    if (args$go_enrich == "true") {
+      ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
+      output_path = paste("cluster_profiler_EGO_",onto,".csv",sep="")
+      write.table(ego, output_path, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
     }
   }
 }
--- a/cluster_profiler.xml	Wed Sep 05 09:34:57 2018 -0400
+++ b/cluster_profiler.xml	Mon Sep 17 10:30:30 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="cluter_profiler" name="clusterProfiler" version="2018.09.05">
+<tool id="cluter_profiler" name="clusterProfiler" version="2018.09.17">
     <description>
     GO terms classification and enrichment analysis
     </description>
@@ -6,6 +6,7 @@
         <requirement type="package" version="3.4.1">R</requirement>
         <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>
         <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.Rn.eg.db</requirement>
         <requirement type="package" version="3.2.0">bioconductor-dose</requirement>
         <requirement type="package" version="3.4.4">bioconductor-clusterprofiler</requirement>
     </requirements>
@@ -37,24 +38,23 @@
             --pval_cutoff="$ego.pval"
             --qval_cutoff="$ego.qval"
             #if $ego.universe.universe_option == "true"
-            #if $ego.universe.universe_input.universe_ids == "text"
-                --universe_type="text"
-                --universe="$ego.universe.universe_input.txt"
-            #else
-                --universe_type="file"
-                --universe="$ego.universe.universe_input.file"
-                --uncol="$ego.universe.universe_input.ncol"
-                --uheader="$ego.universe.universe_input.header"
+                #if $ego.universe.universe_input.universe_ids == "text"
+                    --universe_type="text"
+                    --universe="$ego.universe.universe_input.txt"
+                #else
+                    --universe_type="file"
+                    --universe="$ego.universe.universe_input.file"
+                    --uncol="$ego.universe.universe_input.ncol"
+                    --uheader="$ego.universe.universe_input.header"
+                #end if
+                --universe_id_type="$ego.universe.universe_idti.universe_idtypein"
             #end if
-            --universe_id_type="$ego.universe.universe_idti.universe_idtypein"
-        #end if
         #else
             --go_enrich="false"
         #end if
         
-        --onto_opt="$ontology"
-
-        --text_output="$text_output"
+        --plot="$plot"        
+        --onto_opt="$ontology" > $log
     ]]></command>
     <inputs>
         <conditional name="input" >
@@ -92,7 +92,7 @@
         <param name="species" type="select" label="Select a species" >
             <option value="org.Hs.eg.db">Human</option>
             <option value="org.Mm.eg.db">Mouse</option>
-            <!--option value="org.Sc.eg.db">Rat</option-->
+            <option value="org.Rn.eg.db">Rat</option>
         </param>
         <conditional name="ggo">
             <param name="go_represent" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Do you want to perform GO categories representation analysis?"/>
@@ -151,20 +151,26 @@
             <when value="false"/>
         </conditional>
 				
-		<param name="ontology" type="select" display="checkboxes" multiple="true" label="Please select GO terms category">
+		<param name="ontology" type="select" display="checkboxes" multiple="true" label="Please select GO terms category" optional="false">
             <option value="CC">Cellular Component</option>
             <option value="BP">Biological Process</option>
             <option value="MF">Molecular Function</option>
         </param>
 	    
-	    
+	    <param name="plot" type="select" display="checkboxes" multiple="true" label="Please select your visualization for enrichment analysis" optional="false">
+            <option selected = "true" value="dotplot">dot-plot</option>
+            <option value="barplot">bar-plot</option>
+        </param>
 	    
     </inputs>
     <outputs>
-        <data name="text_output" format="tabular" label="clusterProfiler text output" />
-        <collection type="list" label="clusterProfiler diagram outputs" name="output" >
-	    <discover_datasets pattern="(?P&lt;designation&gt;.+\.png)" ext="png" />
-	</collection>
+        <data name="log" format="tabular" label="log file" />
+        <collection type="list" label="clusterProfiler text files" name="text_output">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+\.csv)" ext="csv"/>
+        </collection>
+        <collection type="list" label="clusterProfiler diagram outputs" name="graph_output" >
+	        <discover_datasets pattern="(?P&lt;designation&gt;.+plot)" ext="png" />
+	    </collection>
     </outputs>
     <tests>
         <test>
@@ -186,8 +192,8 @@
                 <param name="go_enrich" value="false"/>
             </conditional>
             <param name="ontology" value="CC"/>
-            <output name="text_output" file="clusterProfiler_text_output.tabular"/>
-            <output_collection name="output">
+            <output name="log" />
+            <output_collection name="text_output">
                 <element name="GGO.CC.png" file="GGO.CC.png" ftype="png"/>
             </output_collection>
         </test>