comparison kegg_pathways_visualization.R @ 15:da82872f5c80 draft

planemo upload commit 63302cb49d4f0f4dbc9ae141d20704822588f54e-dirty
author proteore
date Mon, 12 Nov 2018 10:54:25 -0500
parents 9fe4a861601b
children
comparison
equal deleted inserted replaced
14:be8115c73b0d 15:da82872f5c80
62 x <- substr(x,4,nchar(x)) 62 x <- substr(x,4,nchar(x))
63 } 63 }
64 return(x) 64 return(x)
65 } 65 }
66 66
67 kegg_to_geneID <- function(vector){
68 vector <- sapply(vector, function(x) unlist(strsplit(x,":"))[2],USE.NAMES = F)
69 return (vector)
70 }
71
67 clean_bad_character <- function(string) { 72 clean_bad_character <- function(string) {
68 string <- gsub("X","",string) 73 string <- gsub("X","",string)
69 return(string) 74 return(string)
70 } 75 }
71 76
77 } 82 }
78 83
79 #return a summary from the mapping with pathview in a vector 84 #return a summary from the mapping with pathview in a vector
80 mapping_summary <- function(pv.out,species,id,id_type){ 85 mapping_summary <- function(pv.out,species,id,id_type){
81 86
82 mapped <- pv.out$plot.data.gene$kegg.names[which(pv.out$plot.data.gene$all.mapped!='')] 87 mapped <- unique(pv.out$plot.data.gene$kegg.names[which(pv.out$plot.data.gene$all.mapped!='')])
83 nb_mapped <- length(mapped) 88 nb_mapped <- length(mapped)
84 nb_kegg_id <- length(unique(pv.out$plot.data.gene$kegg.names)) 89 nb_kegg_id <- length(unique(pv.out$plot.data.gene$kegg.names))
85 ratio = round((nb_mapped/nb_kegg_id)*100, 2) 90 ratio = round((nb_mapped/nb_kegg_id)*100, 2)
86 if (is.nan(ratio)) { ratio = ""} 91 if (is.nan(ratio)) { ratio = ""}
87 pathway_id = paste(species,id,sep="") 92 pathway_id = paste(species,id,sep="")
88 pathway_name = as.character(pathways_list[pathways_list[,1]==pathway_id,][2]) 93 pathway_name = as.character(pathways_list[pathways_list[,1]==pathway_id,][2])
89 94
90 if (id_type=="geneid"){ 95 if (id_type=="geneid" || id_type=="keggid") {
91 row <- c(pathway_id,pathway_name,length(unique(geneID)),nb_kegg_id,nb_mapped,ratio,paste(mapped,collapse=";")) 96 row <- c(pathway_id,pathway_name,length(unique(geneID)),nb_kegg_id,nb_mapped,ratio,paste(mapped,collapse=";"))
92 names(row) <- c("KEGG pathway ID","pathway name","nb of Entrez gene ID used","nb of Entrez gene ID mapped", 97 names(row) <- c("KEGG pathway ID","pathway name","nb of Entrez gene ID used","nb of Entrez gene ID mapped",
93 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped") 98 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped")
94 }else if (id_type=="uniprotid"){ 99 } else if (id_type=="uniprotid") {
95 row <- c(pathway_id,pathway_name,length(unique(uniprotID)),length(unique(geneID)),nb_mapped,nb_kegg_id,ratio,paste(mapped,collapse=";"),paste(mapped2geneID[which(mapped2geneID[,2] %in% mapped)],collapse=";")) 100 row <- c(pathway_id,pathway_name,length(unique(uniprotID)),length(unique(geneID)),nb_mapped,nb_kegg_id,ratio,paste(mapped,collapse=";"),paste(mapped2geneID[which(mapped2geneID[,2] %in% mapped)],collapse=";"))
96 names(row) <- c("KEGG pathway ID","pathway name","nb of Uniprot_AC used","nb of Entrez gene ID used","nb of Entrez gene ID mapped", 101 names(row) <- c("KEGG pathway ID","pathway name","nb of Uniprot_AC used","nb of Entrez gene ID used","nb of Entrez gene ID mapped",
97 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped","uniprot_AC mapped") 102 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped","uniprot_AC mapped")
98 } 103 }
99 return(row) 104 return(row)
192 colnames(tab)[fc_col] <- paste("e",i,sep='') 197 colnames(tab)[fc_col] <- paste("e",i,sep='')
193 tab[,fc_col] <- as.double(gsub(",",".",as.character(tab[,fc_col]) )) 198 tab[,fc_col] <- as.double(gsub(",",".",as.character(tab[,fc_col]) ))
194 } 199 }
195 } 200 }
196 201
197 ##### map uniprotID to entrez geneID 202 ##### map uniprotID to entrez geneID and kegg to geneID
198 if (id_type == "uniprotid") { 203 if (id_type == "uniprotid") {
199 uniprotID = tab[,ncol] 204 uniprotID = tab[,ncol]
200 mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL) 205 mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL)
201 geneID = mapped2geneID[,2] 206 geneID = mapped2geneID[,2]
207 tab = cbind(tab,geneID)
208 }else if (id_type == "keggid"){
209 keggID = tab[,ncol]
210 geneID = kegg_to_geneID(keggID)
202 tab = cbind(tab,geneID) 211 tab = cbind(tab,geneID)
203 }else if (id_type == "geneid"){ 212 }else if (id_type == "geneid"){
204 colnames(tab)[ncol] <- "geneID" 213 colnames(tab)[ncol] <- "geneID"
205 } 214 }
206 215