Mercurial > repos > proteore > proteore_clusterprofiler
changeset 4:18275725e7cf draft
planemo upload commit 9bdfcce89bdea8a0a85bfbf8f0fa9b943b17bea1-dirty
| author | proteore |
|---|---|
| date | Mon, 17 Sep 2018 10:30:30 -0400 |
| parents | 67a796154e2a |
| children | 36c586c918eb |
| files | GO-enrich.R cluster_profiler.xml |
| diffstat | 2 files changed, 80 insertions(+), 43 deletions(-) [+] |
line wrap: on
line diff
--- a/GO-enrich.R Wed Sep 05 09:34:57 2018 -0400 +++ b/GO-enrich.R Mon Sep 17 10:30:30 2018 -0400 @@ -1,7 +1,5 @@ suppressMessages(library(clusterProfiler,quietly = TRUE)) -#library(org.Sc.sgd.db,quietly = TRUE) - # Read file and return file content as data.frame readfile = function(filename, header) { if (header == "true") { @@ -22,14 +20,29 @@ return(file) } +max_str_length_10_first <- function(vector){ + vector <- as.vector(vector) + nb_description = length(vector) + if (nb_description >= 10){nb_description=10} + return(max(nchar(vector[1:nb_description]))) +} + + repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { ggo<-groupGO(gene=geneid, OrgDb = orgdb, ont=ontology, level=level, readable=TRUE) - name <- paste("GGO.", ontology, ".png", sep = "") - png(name) + + if (max_str_length_10_first(ggo$Description) > 50 ){ + width=720 + } else { + width=600 + } + + name <- paste("GGO_", ontology, "_bar-plot", sep = "") + png(name,height = 720, width = width) p <- barplot(ggo, showCategory=10) print(p) dev.off() @@ -37,31 +50,42 @@ } # GO over-representation test -enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) { +enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { ego<-enrichGO(gene=geneid, universe=universe, OrgDb=orgdb, + ont=ontology, keytype="ENTREZID", - ont=ontology, pAdjustMethod="BH", pvalueCutoff=pval_cutoff, qvalueCutoff=qval_cutoff, readable=TRUE) + if (max_str_length_10_first(ego$Description) > 50 ){ + width=800 + } else { + width=600 + } + # Plot bar & dot plots #if there are enriched GopTerms if (length(ego$ID)>0){ - bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") - png(bar_name) + if ("dotplot" %in% plot ){ + dot_name <- paste("EGO_", ontology, "_dot-plot", sep = "") + png(dot_name,height = 720, width = width) + p <- dotplot(ego, showCategory=10) + print(p) + dev.off() + } + + if ("barplot" %in% plot ){ + bar_name <- paste("EGO_", ontology, "_bar-plot", sep = "") + png(bar_name,height = 720, width = width) p <- barplot(ego) print(p) dev.off() - dot_name <- paste("EGO.", ontology, ".dot.png", sep = "") - png(dot_name) - p <- dotplot(ego, showCategory=10) - print(p) - dev.off() return(ego) + } } else { warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE) } @@ -69,7 +93,7 @@ check_ids <- function(vector,type) { uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" - entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" + entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" if (type == "entrez") return(grepl(entrez_id,vector)) else if (type == "uniprot") { @@ -103,7 +127,8 @@ --level: 1-3 --pval_cutoff --qval_cutoff - --text_output: text output filename \n") + --text_output: text output filename + --plot : type of visualization, dotplot or/and barplot \n") q(save="no") } # Parse arguments @@ -111,6 +136,7 @@ argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 + plot = unlist(strsplit(args$plot,",")) #print(args) #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") @@ -123,7 +149,7 @@ orgdb<-org.Hs.eg.db } else if (args$species=="org.Mm.eg.db") { orgdb<-org.Mm.eg.db - } else if (args$species=="org.Sc.eg.db") { + } else if (args$species=="org.Rn.eg.db") { orgdb<-org.Rn.eg.db } @@ -214,17 +240,22 @@ } else { universe_gene = NULL } + } else { + universe_gene = NULL } ##enrichGO : GO over-representation test for (onto in ontology) { if (args$go_represent == "true") { ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) - write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + output_path = paste("cluster_profiler_GGO_",onto,".csv",sep="") + write.table(ggo, output_path, sep="\t", row.names = FALSE, quote=FALSE) } - if (args$go_enrich == "true" & !is.null(universe_gene)) { - ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff) - write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + + if (args$go_enrich == "true") { + ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) + output_path = paste("cluster_profiler_EGO_",onto,".csv",sep="") + write.table(ego, output_path, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) } } }
--- a/cluster_profiler.xml Wed Sep 05 09:34:57 2018 -0400 +++ b/cluster_profiler.xml Mon Sep 17 10:30:30 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="cluter_profiler" name="clusterProfiler" version="2018.09.05"> +<tool id="cluter_profiler" name="clusterProfiler" version="2018.09.17"> <description> GO terms classification and enrichment analysis </description> @@ -6,6 +6,7 @@ <requirement type="package" version="3.4.1">R</requirement> <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement> <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement> + <requirement type="package" version="3.5.0">bioconductor-org.Rn.eg.db</requirement> <requirement type="package" version="3.2.0">bioconductor-dose</requirement> <requirement type="package" version="3.4.4">bioconductor-clusterprofiler</requirement> </requirements> @@ -37,24 +38,23 @@ --pval_cutoff="$ego.pval" --qval_cutoff="$ego.qval" #if $ego.universe.universe_option == "true" - #if $ego.universe.universe_input.universe_ids == "text" - --universe_type="text" - --universe="$ego.universe.universe_input.txt" - #else - --universe_type="file" - --universe="$ego.universe.universe_input.file" - --uncol="$ego.universe.universe_input.ncol" - --uheader="$ego.universe.universe_input.header" + #if $ego.universe.universe_input.universe_ids == "text" + --universe_type="text" + --universe="$ego.universe.universe_input.txt" + #else + --universe_type="file" + --universe="$ego.universe.universe_input.file" + --uncol="$ego.universe.universe_input.ncol" + --uheader="$ego.universe.universe_input.header" + #end if + --universe_id_type="$ego.universe.universe_idti.universe_idtypein" #end if - --universe_id_type="$ego.universe.universe_idti.universe_idtypein" - #end if #else --go_enrich="false" #end if - --onto_opt="$ontology" - - --text_output="$text_output" + --plot="$plot" + --onto_opt="$ontology" > $log ]]></command> <inputs> <conditional name="input" > @@ -92,7 +92,7 @@ <param name="species" type="select" label="Select a species" > <option value="org.Hs.eg.db">Human</option> <option value="org.Mm.eg.db">Mouse</option> - <!--option value="org.Sc.eg.db">Rat</option--> + <option value="org.Rn.eg.db">Rat</option> </param> <conditional name="ggo"> <param name="go_represent" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Do you want to perform GO categories representation analysis?"/> @@ -151,20 +151,26 @@ <when value="false"/> </conditional> - <param name="ontology" type="select" display="checkboxes" multiple="true" label="Please select GO terms category"> + <param name="ontology" type="select" display="checkboxes" multiple="true" label="Please select GO terms category" optional="false"> <option value="CC">Cellular Component</option> <option value="BP">Biological Process</option> <option value="MF">Molecular Function</option> </param> - + <param name="plot" type="select" display="checkboxes" multiple="true" label="Please select your visualization for enrichment analysis" optional="false"> + <option selected = "true" value="dotplot">dot-plot</option> + <option value="barplot">bar-plot</option> + </param> </inputs> <outputs> - <data name="text_output" format="tabular" label="clusterProfiler text output" /> - <collection type="list" label="clusterProfiler diagram outputs" name="output" > - <discover_datasets pattern="(?P<designation>.+\.png)" ext="png" /> - </collection> + <data name="log" format="tabular" label="log file" /> + <collection type="list" label="clusterProfiler text files" name="text_output"> + <discover_datasets pattern="(?P<designation>.+\.csv)" ext="csv"/> + </collection> + <collection type="list" label="clusterProfiler diagram outputs" name="graph_output" > + <discover_datasets pattern="(?P<designation>.+plot)" ext="png" /> + </collection> </outputs> <tests> <test> @@ -186,8 +192,8 @@ <param name="go_enrich" value="false"/> </conditional> <param name="ontology" value="CC"/> - <output name="text_output" file="clusterProfiler_text_output.tabular"/> - <output_collection name="output"> + <output name="log" /> + <output_collection name="text_output"> <element name="GGO.CC.png" file="GGO.CC.png" ftype="png"/> </output_collection> </test>
