Mercurial > repos > proteore > proteore_clusterprofiler
changeset 23:879367568ea6 draft
planemo upload commit 4dbf0d227157012f8017e0e5f44e2821b0136a4c-dirty
| author | proteore |
|---|---|
| date | Fri, 05 Oct 2018 09:16:47 -0400 |
| parents | 6dafa28110f5 |
| children | 267ee3760721 |
| files | GO-enrich.R cluster_profiler.xml |
| diffstat | 2 files changed, 31 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/GO-enrich.R Thu Oct 04 09:25:39 2018 -0400 +++ b/GO-enrich.R Fri Oct 05 09:16:47 2018 -0400 @@ -1,23 +1,15 @@ +options(warn=-1) #TURN OFF WARNINGS !!!!!! suppressMessages(library(clusterProfiler,quietly = TRUE)) # Read file and return file content as data.frame -readfile = function(filename, header) { - if (header == "true") { - # Read only first line of the file as header: - headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - #Read the data of the files (skipping the first row) - file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - # Remove empty rows +read_file <- function(path,header){ + file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - #And assign the header to the data - names(file) <- headers + return(file) } - else { - file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - } - return(file) } #return the number of character from the longest description found (from the 10 first) @@ -28,6 +20,16 @@ return(max(nchar(vector[1:nb_description]))) } +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + #used before the limit was set to 50 characters width_by_max_char <- function (nb_max_char) { if (nb_max_char < 50 ){ @@ -167,6 +169,8 @@ # Extract input IDs input_type = args$input_type + id_type = args$id_type + if (input_type == "text") { input = strsplit(args$input, "[ \t\n]+")[[1]] } else if (input_type == "file") { @@ -178,13 +182,12 @@ } else { ncol = as.numeric(gsub("c", "", ncol)) } - header = args$header - # Get file content - file = readfile(filename, header) - # Extract Protein IDs list + header = str2bool(args$header) # Get file content + file = read_file(filename, header) # Extract Protein IDs list input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) } - id_type = args$id_type + + ## Get input gene list from input IDs #ID format Conversion #This case : from UNIPROT (protein id) to ENTREZ (gene id) @@ -203,11 +206,13 @@ } ontology <- strsplit(args$onto_opt, ",")[[1]] + ## Extract GGO/EGO arguments if (args$go_represent == "true") { go_represent <- args$go_represent level <- as.numeric(args$level) } + if (args$go_enrich == "true") { go_enrich <- args$go_enrich pval_cutoff <- as.numeric(args$pval_cutoff) @@ -226,9 +231,9 @@ } else { universe_ncol = as.numeric(gsub("c", "", universe_ncol)) } - universe_header = args$uheader + universe_header = str2bool(args$uheader) # Get file content - universe_file = readfile(universe_filename, universe_header) + universe_file = read_file(universe_filename, universe_header) # Extract Protein IDs list universe <- sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE) } @@ -240,7 +245,7 @@ suppressMessages(universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb)) universe_gene<-unique(universe_gene$ENTREZID) } else if (universe_id_type=="Entrez" & any(check_ids(universe,"entrez"))) { - universe_gene<-unique(universe) + universe_gene<-unique(unlist(universe)) } else { if (universe_type=="text"){ print(paste(universe_id_type,"not found in your background IDs list",sep=" ")) @@ -261,13 +266,13 @@ if (args$go_represent == "true") { ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="") - write.table(ggo, output_path, sep="\t", row.names = FALSE, quote=FALSE) + write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE ) } if (args$go_enrich == "true") { ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="") - write.table(ego, output_path, sep="\t", row.names = FALSE, quote=FALSE) + write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE ) } } }
--- a/cluster_profiler.xml Thu Oct 04 09:25:39 2018 -0400 +++ b/cluster_profiler.xml Fri Oct 05 09:16:47 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="cluter_profiler" name="clusterProfiler" version="2018.10.04"> +<tool id="cluter_profiler" name="clusterProfiler" version="2018.10.05"> <description> GO terms classification and enrichment analysis </description>
