# HG changeset patch # User proteore # Date 1537194630 14400 # Node ID 18275725e7cf45edebac731ed2a7abccfa1280d3 # Parent 67a796154e2a35b4c9cbe919de2c17dfd8e4309c planemo upload commit 9bdfcce89bdea8a0a85bfbf8f0fa9b943b17bea1-dirty diff -r 67a796154e2a -r 18275725e7cf GO-enrich.R --- a/GO-enrich.R Wed Sep 05 09:34:57 2018 -0400 +++ b/GO-enrich.R Mon Sep 17 10:30:30 2018 -0400 @@ -1,7 +1,5 @@ suppressMessages(library(clusterProfiler,quietly = TRUE)) -#library(org.Sc.sgd.db,quietly = TRUE) - # Read file and return file content as data.frame readfile = function(filename, header) { if (header == "true") { @@ -22,14 +20,29 @@ return(file) } +max_str_length_10_first <- function(vector){ + vector <- as.vector(vector) + nb_description = length(vector) + if (nb_description >= 10){nb_description=10} + return(max(nchar(vector[1:nb_description]))) +} + + repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { ggo<-groupGO(gene=geneid, OrgDb = orgdb, ont=ontology, level=level, readable=TRUE) - name <- paste("GGO.", ontology, ".png", sep = "") - png(name) + + if (max_str_length_10_first(ggo$Description) > 50 ){ + width=720 + } else { + width=600 + } + + name <- paste("GGO_", ontology, "_bar-plot", sep = "") + png(name,height = 720, width = width) p <- barplot(ggo, showCategory=10) print(p) dev.off() @@ -37,31 +50,42 @@ } # GO over-representation test -enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) { +enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { ego<-enrichGO(gene=geneid, universe=universe, OrgDb=orgdb, + ont=ontology, keytype="ENTREZID", - ont=ontology, pAdjustMethod="BH", pvalueCutoff=pval_cutoff, qvalueCutoff=qval_cutoff, readable=TRUE) + if (max_str_length_10_first(ego$Description) > 50 ){ + width=800 + } else { + width=600 + } + # Plot bar & dot plots #if there are enriched GopTerms if (length(ego$ID)>0){ - bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") - png(bar_name) + if ("dotplot" %in% plot ){ + dot_name <- paste("EGO_", ontology, "_dot-plot", sep = "") + png(dot_name,height = 720, width = width) + p <- dotplot(ego, showCategory=10) + print(p) + dev.off() + } + + if ("barplot" %in% plot ){ + bar_name <- paste("EGO_", ontology, "_bar-plot", sep = "") + png(bar_name,height = 720, width = width) p <- barplot(ego) print(p) dev.off() - dot_name <- paste("EGO.", ontology, ".dot.png", sep = "") - png(dot_name) - p <- dotplot(ego, showCategory=10) - print(p) - dev.off() return(ego) + } } else { warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE) } @@ -69,7 +93,7 @@ check_ids <- function(vector,type) { uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" - entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" + entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" if (type == "entrez") return(grepl(entrez_id,vector)) else if (type == "uniprot") { @@ -103,7 +127,8 @@ --level: 1-3 --pval_cutoff --qval_cutoff - --text_output: text output filename \n") + --text_output: text output filename + --plot : type of visualization, dotplot or/and barplot \n") q(save="no") } # Parse arguments @@ -111,6 +136,7 @@ argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 + plot = unlist(strsplit(args$plot,",")) #print(args) #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") @@ -123,7 +149,7 @@ orgdb<-org.Hs.eg.db } else if (args$species=="org.Mm.eg.db") { orgdb<-org.Mm.eg.db - } else if (args$species=="org.Sc.eg.db") { + } else if (args$species=="org.Rn.eg.db") { orgdb<-org.Rn.eg.db } @@ -214,17 +240,22 @@ } else { universe_gene = NULL } + } else { + universe_gene = NULL } ##enrichGO : GO over-representation test for (onto in ontology) { if (args$go_represent == "true") { ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) - write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + output_path = paste("cluster_profiler_GGO_",onto,".csv",sep="") + write.table(ggo, output_path, sep="\t", row.names = FALSE, quote=FALSE) } - if (args$go_enrich == "true" & !is.null(universe_gene)) { - ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff) - write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + + if (args$go_enrich == "true") { + ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) + output_path = paste("cluster_profiler_EGO_",onto,".csv",sep="") + write.table(ego, output_path, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) } } } diff -r 67a796154e2a -r 18275725e7cf cluster_profiler.xml --- a/cluster_profiler.xml Wed Sep 05 09:34:57 2018 -0400 +++ b/cluster_profiler.xml Mon Sep 17 10:30:30 2018 -0400 @@ -1,4 +1,4 @@ - + GO terms classification and enrichment analysis @@ -6,6 +6,7 @@ R bioconductor-org.hs.eg.db bioconductor-org.mm.eg.db + bioconductor-org.Rn.eg.db bioconductor-dose bioconductor-clusterprofiler @@ -37,24 +38,23 @@ --pval_cutoff="$ego.pval" --qval_cutoff="$ego.qval" #if $ego.universe.universe_option == "true" - #if $ego.universe.universe_input.universe_ids == "text" - --universe_type="text" - --universe="$ego.universe.universe_input.txt" - #else - --universe_type="file" - --universe="$ego.universe.universe_input.file" - --uncol="$ego.universe.universe_input.ncol" - --uheader="$ego.universe.universe_input.header" + #if $ego.universe.universe_input.universe_ids == "text" + --universe_type="text" + --universe="$ego.universe.universe_input.txt" + #else + --universe_type="file" + --universe="$ego.universe.universe_input.file" + --uncol="$ego.universe.universe_input.ncol" + --uheader="$ego.universe.universe_input.header" + #end if + --universe_id_type="$ego.universe.universe_idti.universe_idtypein" #end if - --universe_id_type="$ego.universe.universe_idti.universe_idtypein" - #end if #else --go_enrich="false" #end if - --onto_opt="$ontology" - - --text_output="$text_output" + --plot="$plot" + --onto_opt="$ontology" > $log ]]> @@ -92,7 +92,7 @@ - + @@ -151,20 +151,26 @@ - + - + + + + - - - - + + + + + + + @@ -186,8 +192,8 @@ - - + +