Mercurial > repos > proteore > proteore_go_terms_profiles_comparison
comparison GO_prof_comp.R @ 0:a0ea9e00bd66 draft
planemo upload commit e5e4a3efd1cb1498724c0e93f8ec9606d0ed3045-dirty
| author | proteore |
|---|---|
| date | Thu, 28 Feb 2019 05:53:47 -0500 |
| parents | |
| children | 67a4f68f1c1c |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a0ea9e00bd66 |
|---|---|
| 1 options(warn=-1) #TURN OFF WARNINGS !!!!!! | |
| 2 suppressMessages(library(clusterProfiler,quietly = TRUE)) | |
| 3 | |
| 4 #return the number of character from the longest description found (from the 10 first) | |
| 5 max_str_length_10_first <- function(vector){ | |
| 6 vector <- as.vector(vector) | |
| 7 nb_description = length(vector) | |
| 8 if (nb_description >= 10){nb_description=10} | |
| 9 return(max(nchar(vector[1:nb_description]))) | |
| 10 } | |
| 11 | |
| 12 str2bool <- function(x){ | |
| 13 if (any(is.element(c("t","true"),tolower(x)))){ | |
| 14 return (TRUE) | |
| 15 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
| 16 return (FALSE) | |
| 17 }else{ | |
| 18 return(NULL) | |
| 19 } | |
| 20 } | |
| 21 | |
| 22 get_args <- function(){ | |
| 23 | |
| 24 ## Collect arguments | |
| 25 args <- commandArgs(TRUE) | |
| 26 | |
| 27 ## Default setting when no arguments passed | |
| 28 if(length(args) < 1) { | |
| 29 args <- c("--help") | |
| 30 } | |
| 31 | |
| 32 ## Help section | |
| 33 if("--help" %in% args) { | |
| 34 cat("Selection and Annotation HPA | |
| 35 Arguments: | |
| 36 --inputtype1: type of input (list of id or filename) | |
| 37 --inputtype2: type of input (list of id or filename) | |
| 38 --input1: input1 | |
| 39 --input2: input2 | |
| 40 --column1: the column number which you would like to apply... | |
| 41 --column2: the column number which you would like to apply... | |
| 42 --header1: true/false if your file contains a header | |
| 43 --header2: true/false if your file contains a header | |
| 44 --ont: ontology to use | |
| 45 --lev: ontology level | |
| 46 --org: organism db package | |
| 47 --list_name1: name of the first list | |
| 48 --list_name2: name of the second list \n") | |
| 49 | |
| 50 q(save="no") | |
| 51 } | |
| 52 | |
| 53 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
| 54 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
| 55 args <- as.list(as.character(argsDF$V2)) | |
| 56 names(args) <- argsDF$V1 | |
| 57 | |
| 58 return(args) | |
| 59 } | |
| 60 | |
| 61 get_ids=function(inputtype, input, ncol, header) { | |
| 62 | |
| 63 if (inputtype == "text") { | |
| 64 ids = strsplit(input, "[ \t\n]+")[[1]] | |
| 65 } else if (inputtype == "file") { | |
| 66 header=str2bool(header) | |
| 67 ncol=get_cols(ncol) | |
| 68 csv = read.csv(input,header=header, sep="\t", as.is=T) | |
| 69 ids=csv[,ncol] | |
| 70 } | |
| 71 | |
| 72 ids = unlist(strsplit(ids,";")) | |
| 73 ids = ids[which(!is.na(ids))] | |
| 74 | |
| 75 return(ids) | |
| 76 } | |
| 77 | |
| 78 str2bool <- function(x){ | |
| 79 if (any(is.element(c("t","true"),tolower(x)))){ | |
| 80 return (TRUE) | |
| 81 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
| 82 return (FALSE) | |
| 83 }else{ | |
| 84 return(NULL) | |
| 85 } | |
| 86 } | |
| 87 | |
| 88 check_ids <- function(vector,type) { | |
| 89 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" | |
| 90 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" | |
| 91 if (type == "entrez") | |
| 92 return(grepl(entrez_id,vector)) | |
| 93 else if (type == "uniprot") { | |
| 94 return(grepl(uniprot_pattern,vector)) | |
| 95 } | |
| 96 } | |
| 97 | |
| 98 make_dotplot<-function(res.cmp,ontology) { | |
| 99 | |
| 100 res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE) | |
| 101 output_path= paste("GO_profiles_comp_",ontology,".png",sep="") | |
| 102 png(output_path,height = 720, width = 600) | |
| 103 p <- dotplot(res.cmp, showCategory=30) | |
| 104 print(p) | |
| 105 dev.off() | |
| 106 } | |
| 107 | |
| 108 get_cols <-function(input_cols) { | |
| 109 input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols))) | |
| 110 if (grepl(":",input_cols)) { | |
| 111 first_col=unlist(strsplit(input_cols,":"))[1] | |
| 112 last_col=unlist(strsplit(input_cols,":"))[2] | |
| 113 cols=first_col:last_col | |
| 114 } else { | |
| 115 cols = as.integer(unlist(strsplit(input_cols,","))) | |
| 116 } | |
| 117 return(cols) | |
| 118 } | |
| 119 | |
| 120 #to check | |
| 121 cmp.GO <- function(l,fun="groupGO",orgdb, ontology, level=3, readable=TRUE) { | |
| 122 cmpGO<-compareCluster(geneClusters = l, | |
| 123 fun=fun, | |
| 124 OrgDb = orgdb, | |
| 125 ont=ontology, | |
| 126 level=level, | |
| 127 readable=TRUE) | |
| 128 | |
| 129 return(cmpGO) | |
| 130 } | |
| 131 | |
| 132 check_ids <- function(vector,type) { | |
| 133 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" | |
| 134 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" | |
| 135 if (type == "entrez") | |
| 136 return(grepl(entrez_id,vector)) | |
| 137 else if (type == "uniprot") { | |
| 138 return(grepl(uniprot_pattern,vector)) | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 main = function() { | |
| 143 | |
| 144 #to get the args of the command line | |
| 145 args=get_args() | |
| 146 | |
| 147 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda") | |
| 148 #load("/home/dchristiany/proteore_project/ProteoRE/tools/GO_terms_profiles_comparison/args.rda") | |
| 149 | |
| 150 ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1) | |
| 151 ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2) | |
| 152 ont = strsplit(args$ont, ",")[[1]] | |
| 153 lev=as.integer(args$lev) | |
| 154 org=args$org | |
| 155 | |
| 156 #load annot package | |
| 157 suppressMessages(library(args$org, character.only = TRUE, quietly = TRUE)) | |
| 158 | |
| 159 # Extract OrgDb | |
| 160 if (args$org=="org.Hs.eg.db") { | |
| 161 orgdb<-org.Hs.eg.db | |
| 162 } else if (args$org=="org.Mm.eg.db") { | |
| 163 orgdb<-org.Mm.eg.db | |
| 164 } else if (args$org=="org.Rn.eg.db") { | |
| 165 orgdb<-org.Rn.eg.db | |
| 166 } | |
| 167 | |
| 168 for(ontology in ont) { | |
| 169 liste = list("l1"=ids1,"l2"=ids2) | |
| 170 names(liste) = c(args$list_name1,args$list_name2) | |
| 171 res.cmp<-cmp.GO(l=liste,fun="groupGO",orgdb, ontology, level=lev, readable=TRUE) | |
| 172 make_dotplot(res.cmp,ontology) | |
| 173 output_path = paste("GO_profiles_comp_",ontology,".tsv",sep="") | |
| 174 write.table(res.cmp@compareClusterResult, output_path, sep="\t", row.names=F, quote=F) | |
| 175 } | |
| 176 | |
| 177 } #end main | |
| 178 | |
| 179 main() | |
| 180 |
