comparison kegg_maps_visualization.R @ 12:10c572ad2b9e draft default tip

"planemo upload commit 209e9e48505e55a2a2b4b1d7b053f557344bc2e8-dirty"
author proteore
date Mon, 10 May 2021 12:45:44 +0000
parents 7d04efde2526
children
comparison
equal deleted inserted replaced
11:7d04efde2526 12:10c572ad2b9e
1 #!/usr/bin/Rscript 1 #!/usr/bin/Rscript
2 #Rscript made for mapping genesID on KEGG pathway with Pathview package 2 #Rscript made for mapping genesID on KEGG pathway with Pathview package
3 #input : csv file containing ids (uniprot or geneID) to map, plus parameters 3 #input : csv file containing ids (uniprot or geneid) to map, plus parameters
4 #output : KEGG pathway : jpeg or pdf file. 4 #output : KEGG pathway : jpeg or pdf file.
5 5
6 options(warn=-1) #TURN OFF WARNINGS !!!!!! 6 options(warn = -1) #TURN OFF WARNINGS !!!!!!
7 suppressMessages(library("pathview")) 7 suppressMessages(library("pathview"))
8 suppressMessages(library(KEGGREST)) 8 suppressMessages(library(KEGGREST))
9 9
10 read_file <- function(path,header){ 10 read_file <- function(path, header) {
11 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F, comment.char = ""),silent=TRUE) 11 file <- try(read.csv(path, header = header, sep = "\t",
12 if (inherits(file,"try-error")){ 12 stringsAsFactors = FALSE, quote = "\"", check.names = F,
13 stop("Read file error ! Please check your file (header, # character, etc) ") 13 comment.char = ""), silent = TRUE)
14 }else{ 14 if (inherits(file, "try-error")) {
15 stop("Read file error! Please check your file (header, # character, etc)")
16 }else {
15 return(file) 17 return(file)
16 } 18 }
17 } 19 }
18 20
19 ##### fuction to clean and concatenate pathway name (allow more flexibility for user input) 21 ##### fuction to clean and concatenate pathway name
20 concat_string <- function(x){ 22 ##### (allow more flexibility for user input)
21 x <- gsub(" - .*","",x) 23 concat_string <- function(x) {
22 x <- gsub(" ","",x) 24 x <- gsub(" - .*", "", x)
23 x <- gsub("-","",x) 25 x <- gsub(" ", "", x)
24 x <- gsub("_","",x) 26 x <- gsub("-", "", x)
25 x <- gsub(",","",x) 27 x <- gsub("_", "", x)
26 x <- gsub("\\'","",x) 28 x <- gsub(",", "", x)
27 x <- gsub("\\(.*)","",x) 29 x <- gsub("\\'", "", x)
28 x <- gsub("\\/","",x) 30 x <- gsub("\\(.*)", "", x)
31 x <- gsub("\\/", "", x)
29 x <- tolower(x) 32 x <- tolower(x)
30 return(x) 33 return(x)
31 } 34 }
32 35
33 #return output suffix (pathway name) from id kegg (ex : hsa:00010) 36 #return output suffix (pathway name) from id kegg (ex : hsa:00010)
34 get_suffix <- function(pathways_list,species,id){ 37 get_suffix <- function(pathways_list, species, id) {
35 suffix = gsub("/","or",pathways_list[pathways_list[,1]==paste(species,id,sep=""),2]) 38 suffix <- gsub("/", "or", pathways_list[pathways_list[, 1] == paste(
36 suffix = gsub(" ","_",suffix) 39 species, id, sep = ""), 2])
37 if (nchar(suffix) > 50){ 40 suffix <- gsub(" ", "_", suffix)
38 suffix = substr(suffix,1,50) 41 if (nchar(suffix) > 50) {
42 suffix <- substr(suffix, 1, 50)
39 } 43 }
40 return(suffix) 44 return(suffix)
41 } 45 }
42 46
43 str2bool <- function(x){ 47 str2bool <- function(x) {
44 if (any(is.element(c("t","true"),tolower(x)))){ 48 if (any(is.element(c("t", "true"), tolower(x)))) {
45 return (TRUE) 49 return(TRUE)
46 }else if (any(is.element(c("f","false"),tolower(x)))){ 50 }else if (any(is.element(c("f", "false"), tolower(x)))) {
47 return (FALSE) 51 return(FALSE)
48 }else{ 52 }else {
49 return(NULL) 53 return(NULL)
50 } 54 }
51 } 55 }
52 56
53 is.letter <- function(x) grepl("[[:alpha:]]", x) 57 is_letter <- function(x) grepl("[[:alpha:]]", x)
54 58
55 #### hsa00010 -> 00010 59 remove_kegg_prefix <- function(x) {
56 remove_kegg_prefix <- function(x){ 60 x <- gsub(":", "", x)
57 x = gsub(":","",x) 61 if (substr(x, 1, 4) == "path") {
58 if (substr(x,1,4) == 'path'){ 62 x <- substr(x, 5, nchar(x))
59 x=substr(x,5,nchar(x)) 63 }
60 } 64 if (is_letter(substr(x, 1, 3))) {
61 if (is.letter(substr(x,1,3))){ 65 x <- substr(x, 4, nchar(x))
62 x <- substr(x,4,nchar(x))
63 } 66 }
64 return(x) 67 return(x)
65 } 68 }
66 69
67 kegg_to_geneID <- function(vector){ 70 kegg_to_geneid <- function(vector) {
68 vector <- sapply(vector, function(x) unlist(strsplit(x,":"))[2],USE.NAMES = F) 71 #nolint start
69 return (vector) 72 vector <- sapply(vector, function(x) unlist(
73 strsplit(x, ":"))[2], USE.NAMES = F)
74 #nolint end
75 return(vector)
70 } 76 }
71 77
72 clean_bad_character <- function(string) { 78 clean_bad_character <- function(string) {
73 string <- gsub("X","",string) 79 string <- gsub("X", "", string)
74 return(string) 80 return(string)
75 } 81 }
76 82
77 get_list_from_cp <-function(list){ 83 get_list_from_cp <- function(list) {
78 list = gsub(";","\t",list) 84 list <- gsub(";", "\t", list)
79 list = gsub(",","\t",list) 85 list <- gsub(",", "\t", list)
80 list = strsplit(list, "[ \t\n]+")[[1]] 86 list <- strsplit(list, "[ \t\n]+")[[1]]
81 list = list[list != ""] #remove empty entry 87 list <- list[list != ""] #remove empty entry
82 list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2") 88 list <- gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2")
83 return(list) 89 return(list)
84 } 90 }
85 91
86 get_ref_pathways <- function(species){ 92 get_ref_pathways <- function(species) {
87 ##all available pathways for the species 93 ##all available pathways for the species
88 pathways <- keggLink("pathway", species) 94 pathways <- keggLink("pathway", species)
89 tot_path <- unique(pathways) 95 tot_path <- unique(pathways)
90 96
91 ##formating the dat into a list object 97 ##formating the dat into a list object
92 ##key= pathway ID, value = genes of the pathway in the kegg format 98 ##key= pathway ID, value = genes of the pathway in the kegg format
93 pathways_list <- sapply(tot_path, function(pathway) names(which(pathways==pathway))) 99 pathways_list <- sapply(tot_path, function(pathway)
94 return (pathways_list) 100 names(which(pathways == pathway)))
95 } 101 return(pathways_list)
96 102 }
97 mapping_summary <- function(pv.out,species,id,id_type,pathways_list,geneID,uniprotID,mapped2geneID){ 103
98 ref_pathways = get_ref_pathways(species) 104 #nolint start
99 names(ref_pathways) <- sapply(names(ref_pathways), function(x) gsub("path:[a-z]{3}","",x),USE.NAMES = F) 105 mapping_summary <- function(pv_out, species, id, id_type, pathways_list,
100 106 geneid, uniprotid, mapped2geneid) {
107 ref_pathways <- get_ref_pathways(species)
108 names(ref_pathways) <- sapply(names(ref_pathways), function(x)
109 gsub("path:[a-z]{3}", "", x), USE.NAMES = F)
110 #nolint end
101 #genes present in pathway 111 #genes present in pathway
102 genes = ref_pathways[id][[1]] 112 genes <- ref_pathways[id][[1]]
103 nb_genes = length(genes) 113 nb_genes <- length(genes)
104 114
105 #genes mapped on pathway genes 115 #genes mapped on pathway genes
106 mapped <- unlist(sapply(pv.out$plot.data.gene$all.mapped, function(x) strsplit(x,",")),use.names = F) 116 mapped <- unlist(sapply(pv_out$plot.data.gene$all.mapped,
107 mapped = unique(mapped[mapped!=""]) 117 function(x) strsplit(x, ",")), use.names = F)
118 mapped <- unique(mapped[mapped != ""])
108 nb_mapped <- length(mapped) 119 nb_mapped <- length(mapped)
109 120
110 #compue ratio of mapping 121 #compute ratio of mapping
111 ratio = round((nb_mapped/nb_genes)*100, 2) 122 ratio <- round((nb_mapped / nb_genes) * 100, 2)
112 if (is.nan(ratio)) { ratio = ""} 123 if (is.nan(ratio)) {
113 pathway_id = paste(species,id,sep="") 124 ratio <- ""
114 pathway_name = as.character(pathways_list[pathways_list[,1]==pathway_id,][2]) 125 }
115 126 pathway_id <- paste(species, id, sep = "")
116 if (id_type=="geneid" || id_type=="keggid") { 127 pathway_name <- as.character(
117 row <- c(pathway_id,pathway_name,length(unique(geneID)),nb_mapped,nb_genes,ratio,paste(mapped,collapse=";")) 128 pathways_list[pathways_list[, 1] == pathway_id, ][2])
118 names(row) <- c("KEGG pathway ID","pathway name","nb of Entrez gene ID used","nb of Entrez gene ID mapped", 129
119 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped") 130 if (id_type == "geneid" || id_type == "keggid") {
120 } else if (id_type=="uniprotid") { 131 row <- c(pathway_id, pathway_name, length(unique(geneid)),
121 row <- c(pathway_id,pathway_name,length(unique(uniprotID)),length(unique(geneID)),nb_mapped,nb_genes,ratio,paste(mapped,collapse=";"),paste(mapped2geneID[which(mapped2geneID[,2] %in% mapped)],collapse=";")) 132 nb_mapped, nb_genes, ratio, paste(mapped, collapse = ";"))
122 names(row) <- c("KEGG pathway ID","pathway name","nb of Uniprot_AC used","nb of Entrez gene ID used","nb of Entrez gene ID mapped", 133 names(row) <- c("KEGG pathway ID", "pathway name",
123 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped","uniprot_AC mapped") 134 "nb of Entrez gene ID used", "nb of Entrez gene ID mapped",
124 } 135 "nb of Entrez gene ID in the pathway",
136 "ratio of Entrez gene ID mapped (%)",
137 "Entrez gene ID mapped")
138 } else if (id_type == "uniprotid") {
139 row <- c(pathway_id, pathway_name, length(unique(uniprotid)),
140 length(unique(geneid)), nb_mapped, nb_genes, ratio,
141 paste(mapped, collapse = ";"),
142 paste(mapped2geneid[which(mapped2geneid[, 2] %in% mapped)],
143 collapse = ";"))
144 names(row) <- c("KEGG pathway ID", "pathway name", "nb of Uniprot_AC used",
145 "nb of Entrez gene ID used", "nb of Entrez gene ID mapped",
146 "nb of Entrez gene ID in the pathway",
147 "ratio of Entrez gene ID mapped (%)",
148 "Entrez gene ID mapped", "uniprot_AC mapped")
149 }
125 return(row) 150 return(row)
126 } 151 }
127 152
128 #take data frame, return data frame 153 #take data frame, return data frame
129 split_ids_per_line <- function(line,ncol){ 154 split_ids_per_line <- function(line, ncol) {
130 155
131 #print (line) 156 #print (line)
132 header = colnames(line) 157 header <- colnames(line)
133 line[ncol] = gsub("[[:blank:]]|\u00A0","",line[ncol]) 158 line[ncol] <- gsub("[[:blank:]]|\u00A0", "", line[ncol])
134 159
135 if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) { 160 if (length(unlist(strsplit(as.character(line[ncol]), ";"))) > 1) {
136 if (length(line)==1 ) { 161 if (length(line) == 1) {
137 lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F) 162 lines <- as.data.frame(unlist(strsplit(as.character(line[ncol]), ";")),
163 stringsAsFactors = F)
138 } else { 164 } else {
139 if (ncol==1) { #first column 165 if (ncol == 1) { #first column
140 lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)])) 166 lines <- suppressWarnings(cbind(unlist(strsplit(
141 } else if (ncol==length(line)) { #last column 167 as.character(line[ncol]), ";")), line[2:length(line)]))
142 lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";")))) 168 } else if (ncol == length(line)) { #last column
169 lines <- suppressWarnings(cbind(line[1:ncol - 1],
170 unlist(strsplit(as.character(line[ncol]), ";"))))
143 } else { 171 } else {
144 lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)])) 172 lines <- suppressWarnings(cbind(line[1:ncol - 1],
173 unlist(strsplit(as.character(line[ncol]), ";"), use.names = F),
174 line[(ncol + 1):length(line)]))
145 } 175 }
146 } 176 }
147 colnames(lines)=header 177 colnames(lines) <- header
148 return(lines) 178 return(lines)
149 } else { 179 } else {
150 return(line) 180 return(line)
151 } 181 }
152 } 182 }
153 183
154 #create new lines if there's more than one id per cell in the columns in order to have only one id per line 184 #create new lines if there's more than one id per cell in the columns in order
155 one_id_one_line <-function(tab,ncol){ 185 # to have only one id per line
156 186 one_id_one_line <- function(tab, ncol) {
157 if (ncol(tab)>1){ 187
158 188 if (ncol(tab) > 1) {
159 tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x)) 189
160 header=colnames(tab) 190 tab[, ncol] <- sapply(tab[, ncol], function(x) gsub("[[:blank:]]", "", x))
161 res=as.data.frame(matrix(ncol=ncol(tab),nrow=0)) 191 header <- colnames(tab)
162 for (i in 1:nrow(tab) ) { 192 res <- as.data.frame(matrix(ncol = ncol(tab), nrow = 0))
163 lines = split_ids_per_line(tab[i,],ncol) 193 for (i in seq_len(nrow(tab))) {
164 res = rbind(res,lines) 194 lines <- split_ids_per_line(tab[i, ], ncol)
165 } 195 res <- rbind(res, lines)
166 }else { 196 }
167 res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F) 197 } else {
168 res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F) 198 res <- unlist(sapply(tab[, 1], function(x) strsplit(x, ";")), use.names = F)
169 colnames(res)=colnames(tab) 199 res <- data.frame(res[which(!is.na(res[res != ""]))], stringsAsFactors = F)
200 colnames(res) <- colnames(tab)
170 } 201 }
171 return(res) 202 return(res)
172 } 203 }
173 204
174 get_limit <- function(mat) { 205 get_limit <- function(mat) {
175 min = min(apply(mat,2,min,na.rm=TRUE)) 206 min <- min(apply(mat, 2, min, na.rm = TRUE))
176 max = max(apply(mat,2,max,na.rm=TRUE)) 207 max <- max(apply(mat, 2, max, na.rm = TRUE))
177 return(c(min,max)) 208 return(c(min, max))
178 } 209 }
179 210
180 check_pathway_ids<- function(pathways_ids) { 211 check_pathway_ids <- function(pathways_ids) {
181 problematic_pathways <- c("04215","04723") 212 problematic_pathways <- c("04215", "04723")
182 to_remove <- intersect(pathways_ids,problematic_pathways) 213 to_remove <- intersect(pathways_ids, problematic_pathways)
183 if (length(to_remove) > 0){ print(paste("Pathway(s)",to_remove,"have been remove from input")) } 214 if (length(to_remove) > 0) {
215 print(paste("Pathway(s)", to_remove, "have been remove from input"))
216 }
184 pathways_ids <- pathways_ids[which(!pathways_ids %in% problematic_pathways)] 217 pathways_ids <- pathways_ids[which(!pathways_ids %in% problematic_pathways)]
185 if (length(pathways_ids) == 0){stop("No pathways ids to process")} 218 if (length(pathways_ids) == 0) {
186 return (pathways_ids) 219 stop("No pathways ids to process")
187 } 220 }
188 221 return(pathways_ids)
189 get_args <- function(){ 222 }
190 223
224 get_args <- function() {
225
191 ## Collect arguments 226 ## Collect arguments
192 args <- commandArgs(TRUE) 227 args <- commandArgs(TRUE)
193 228
194 ## Default setting when no arguments passed 229 ## Default setting when no arguments passed
195 if(length(args) < 1) { 230 if (length(args) < 1) {
196 args <- c("--help") 231 args <- c("--help")
197 } 232 }
198 233
199 ## Help section 234 ## Help section
200 if("--help" %in% args) { 235 if ("--help" %in% args) {
201 cat("Pathview R script 236 cat("Pathview R script
202 Arguments: 237 Arguments:
203 --help Print this test 238 --help Print this test
204 --input path of the input file (must contains a colum of uniprot and/or geneID accession number) 239 --input path of the input file (must contains a colum of
240 uniprot and/or geneid accession number)
205 --id_list list of ids to use, ',' separated 241 --id_list list of ids to use, ',' separated
206 --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412 242 --pathways_id Id(s) of pathway(s) to use, if several, semicolon
207 --id_type Type of accession number ('uniprotID' or 'geneID') 243 separated list : hsa00010;hsa05412
208 --id_column Column containing accesion number of interest (ex : 'c1') 244 --id_type Type of accession number ('uniprotid' or 'geneid')
245 --id_column Column containing accesion number of interest
246 (ex : 'c1')
209 --header Boolean, TRUE if header FALSE if not 247 --header Boolean, TRUE if header FALSE if not
210 --output Output filename 248 --output Output filename
211 --fold_change_col Column(s) containing fold change values (comma separated) 249 --fold_change_col Column(s) containing fold change values
250 (comma separated)
212 --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph 251 --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph
213 --species KEGG species (hsa, mmu, ...) 252 --species KEGG species (hsa, mmu, ...)
214 --pathways_input Tab with pathways in a column, output format of find_pathways 253 --pathways_input Tab with pathways in a column, output format of
254 find_pathways
215 --pathway_col Column of pathways to use 255 --pathway_col Column of pathways to use
216 --header2 Boolean, TRUE if header FALSE if not 256 --header2 Boolean, TRUE if header FALSE if not
217 --pathways_list path of file containg the species pathways list (hsa_pathways.loc, mmu_pathways.loc, ...) 257 --pathways_list path of file containg the species pathways list
258 (hsa_pathways.loc, mmu_pathways.loc, ...)
218 259
219 Example: 260 Example:
220 ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n") 261 ./PathView.R --input 'input.csv' --pathway_id '05412'
221 262 --id_type 'uniprotid' --id_column 'c1' --header TRUE \n\n")
222 q(save="no") 263
223 } 264 q(save = "no")
224 265 }
225 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 266
226 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 267 parseargs <- function(x) strsplit(sub("^--", "", x), "=")
227 args <- as.list(as.character(argsDF$V2)) 268 argsdf <- as.data.frame(do.call("rbind", parseargs(args)))
228 names(args) <- argsDF$V1 269 args <- as.list(as.character(argsdf$V2))
229 270 names(args) <- argsdf$V1
271
230 return(args) 272 return(args)
231 } 273 }
232 274
233 main <- function(){ 275 main <- function() {
234 276
235 args <- get_args() 277 args <- get_args()
236 278
237 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/kegg_maps_visualization/args.Rda") 279 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/
238 #load("/home/dchristiany/proteore_project/ProteoRE/tools/kegg_maps_visualization/args.Rda") 280 #kegg_maps_visualization/args.Rda")
239 281 #load("/home/dchristiany/proteore_project/ProteoRE/tools/
282 #kegg_maps_visualization/args.Rda")
283
240 ###setting variables 284 ###setting variables
241 if (!is.null(args$pathways_id)) { 285 if (!is.null(args$pathways_id)) {
242 ids <- get_list_from_cp(clean_bad_character(args$pathways_id)) 286 ids <- get_list_from_cp(clean_bad_character(args$pathways_id))
243 ids <- sapply(ids, function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) 287 ids <- sapply(ids, function(x) remove_kegg_prefix(x), USE.NAMES = FALSE)
244 }else if (!is.null(args$pathways_input)){ 288 }else if (!is.null(args$pathways_input)) {
245 header2 <- str2bool(args$header2) 289 header2 <- str2bool(args$header2)
246 pathway_col <- as.numeric(gsub("c", "" ,args$pathway_col)) 290 pathway_col <- as.numeric(gsub("c", "", args$pathway_col))
247 pathways_file = read_file(args$pathways_input,header2) 291 pathways_file <- read_file(args$pathways_input, header2)
248 ids <- sapply(rapply(strsplit(clean_bad_character(pathways_file[,pathway_col]),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) 292 ids <- sapply(rapply(strsplit(clean_bad_character(
249 } 293 pathways_file[, pathway_col]), ","), c),
250 if (args$native_kegg) {ids = check_pathway_ids(ids)} #remove problematic pathways 294 function(x) remove_kegg_prefix(x), USE.NAMES = FALSE)
251 pathways_list <- read_file(args$pathways_list,F) 295 }
296 if (args$native_kegg) {
297 ids <- check_pathway_ids(ids)
298 } #remove problematic pathways
299 pathways_list <- read_file(args$pathways_list, F)
252 if (!is.null(args$id_list)) { 300 if (!is.null(args$id_list)) {
253 id_list <- get_list_from_cp(args$id_list) 301 id_list <- get_list_from_cp(args$id_list)
254 } 302 }
255 id_type <- tolower(args$id_type) 303 id_type <- tolower(args$id_type)
256 ncol <- as.numeric(gsub("c", "" ,args$id_column)) 304 ncol <- as.numeric(gsub("c", "", args$id_column))
257 header <- str2bool(args$header) 305 header <- str2bool(args$header)
258 native_kegg <- str2bool(args$native_kegg) 306 native_kegg <- str2bool(args$native_kegg)
259 species=args$species 307 species <- args$species
260 fold_change_data = str2bool(args$fold_change_data) 308 fold_change_data <- str2bool(args$fold_change_data)
261 309
262 #org list used in mapped2geneID 310 #org list used in mapped2geneid
263 org <- c('Hs','Mm','Rn') 311 org <- c("Hs", "Mm", "Rn")
264 names(org) <- c('hsa','mmu','rno') 312 names(org) <- c("hsa", "mmu", "rno")
265 313
266 #read input file or list 314 #read input file or list
267 if (!is.null(args$input)){ 315 if (!is.null(args$input)) {
268 tab <- read_file(args$input,header) 316 tab <- read_file(args$input, header)
269 tab <- data.frame(tab[which(tab[ncol]!=""),],stringsAsFactors = F) 317 tab <- data.frame(tab[which(tab[ncol] != ""), ], stringsAsFactors = F)
270 tab = one_id_one_line(tab,ncol) 318 tab <- one_id_one_line(tab, ncol)
271 } else { 319 } else {
272 id_list = gsub("[[:blank:]]|\u00A0|NA","",id_list) 320 id_list <- gsub("[[:blank:]]|\u00A0|NA", "", id_list)
273 id_list = unique(id_list[id_list!=""]) 321 id_list <- unique(id_list[id_list != ""])
274 tab <- data.frame(id_list,stringsAsFactors = F) 322 tab <- data.frame(id_list, stringsAsFactors = F)
275 ncol=1 323 ncol <- 1
276 } 324 }
277 325
278 326
279 ##### map uniprotID to entrez geneID and kegg to geneID 327 ##### map uniprotid to entrez geneid and kegg to geneid
280 uniprotID="" 328 uniprotid <- ""
281 mapped2geneID="" 329 mapped2geneid <- ""
282 if (id_type == "uniprotid") { 330 if (id_type == "uniprotid") {
283 uniprotID=tab[,ncol] 331 uniprotid <- tab[, ncol]
284 mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL) 332 mapped2geneid <- id2eg(ids = uniprotid, category = "uniprot",
285 geneID = mapped2geneID[,2] 333 org = org[[species]], pkg.name = NULL)
286 tab = cbind(tab,geneID) 334 geneid <- mapped2geneid[, 2]
287 ncol=ncol(tab) 335 tab <- cbind(tab, geneid)
288 }else if (id_type == "keggid"){ 336 ncol <- ncol(tab)
289 keggID = tab[,ncol] 337 }else if (id_type == "keggid") {
290 geneID = kegg_to_geneID(keggID) 338 keggid <- tab[, ncol]
291 tab = cbind(tab,geneID) 339 geneid <- kegg_to_geneid(keggid)
292 ncol=ncol(tab) 340 tab <- cbind(tab, geneid)
293 }else if (id_type == "geneid"){ 341 ncol <- ncol(tab)
294 colnames(tab)[ncol] <- "geneID" 342 }else if (id_type == "geneid") {
295 } 343 colnames(tab)[ncol] <- "geneid"
296 344 }
345
297 ##### build matrix to map on KEGG pathway (kgml : KEGG xml) 346 ##### build matrix to map on KEGG pathway (kgml : KEGG xml)
298 geneID_indices = which(!is.na(tab$geneID)) 347 geneid_indices <- which(!is.na(tab$geneid))
299 if (fold_change_data) { 348 if (fold_change_data) {
300 fold_change <- as.integer(unlist(strsplit(gsub("c","",args$fold_change_col),","))) 349 fold_change <- as.integer(unlist(strsplit(gsub("c", "",
301 if (length(fold_change) > 3) { fold_change= fold_change[1:3] } 350 args$fold_change_col), ",")))
302 if (length(fold_change)==1){ 351 if (length(fold_change) > 3) {
303 tab[,fold_change] <- as.double(gsub(",",".",as.character(tab[,fold_change]) )) 352 fold_change <- fold_change[1:3]
353 }
354 if (length(fold_change) == 1) {
355 tab[, fold_change] <- as.double(gsub(
356 ",", ".", as.character(tab[, fold_change])))
304 } else { 357 } else {
305 tab[,fold_change] <- apply(tab[,fold_change],2,function(x) as.double(gsub(",",".",as.character(x)))) 358 tab[, fold_change] <- apply(tab[, fold_change], 2,
306 } 359 function(x) as.double(gsub(",", ".", as.character(x))))
307 mat = tab[geneID_indices,c(ncol,fold_change)] 360 }
308 mat = mat[(!duplicated(mat$geneID)),] 361 mat <- tab[geneid_indices, c(ncol, fold_change)]
309 geneID=mat$geneID 362 mat <- mat[(!duplicated(mat$geneid)), ]
310 mat = as.data.frame(mat[,-1]) 363 geneid <- mat$geneid
311 row.names(mat)=geneID 364 mat <- as.data.frame(mat[, -1])
312 limit = get_limit(mat) 365 row.names(mat) <- geneid
366 limit <- get_limit(mat)
313 } else { 367 } else {
314 mat = unique(as.character(tab$geneID[!is.na(tab$geneID[tab$geneID!=""])])) 368 mat <- unique(as.character(
315 geneID=mat 369 tab$geneid[!is.na(tab$geneid[tab$geneid != ""])]))
316 limit=1 370 geneid <- mat
317 } 371 limit <- 1
318 372 }
319 #####mapping geneID (with or without expression values) on KEGG pathway 373
320 plot.col.key= TRUE 374 #####mapping geneid (with or without expression values) on KEGG pathway
321 low_color = "green" 375 plot_col_key <- TRUE
322 mid_color = "#F3F781" #yellow 376 low_color <- "green"
323 high_color = "red" 377 mid_color <- "#F3F781" #yellow
378 high_color <- "red"
324 if (!fold_change_data) { 379 if (!fold_change_data) {
325 plot.col.key= FALSE #if there's no exrepession data, we don't show the color key 380 plot_col_key <- FALSE
326 high_color = "#81BEF7" #blue 381 #if there's no exrepession data, we don't show the color key
327 } 382 high_color <- "#81BEF7" #blue
328 383 }
384
329 #create graph(s) and text output 385 #create graph(s) and text output
330 for (id in ids) { 386 for (id in ids) {
331 suffix= get_suffix(pathways_list,species,id) 387 suffix <- get_suffix(pathways_list, species, id)
332 pv.out <- suppressMessages(pathview(gene.data = mat, 388 pv_out <- suppressMessages(pathview(gene.data = mat,
333 gene.idtype = "entrez", 389 gene.idtype = "entrez",
334 pathway.id = id, 390 pathway.id = id,
335 species = species, 391 species = species,
336 kegg.dir = ".", 392 kegg.dir = ".",
337 out.suffix=suffix, 393 out.suffix = suffix,
338 kegg.native = native_kegg, 394 kegg.native = native_kegg,
339 low = list(gene = low_color, cpd = "blue"), 395 low = list(gene = low_color, cpd = "blue"),
340 mid = list(gene = mid_color, cpd = "transparent"), 396 mid = list(gene = mid_color, cpd = "transparent"),
341 high = list(gene = high_color, cpd = "yellow"), 397 high = list(gene = high_color, cpd = "yellow"),
342 na.col="#D8D8D8", #gray 398 na.col = "#D8D8D8", #gray
343 cpd.data=NULL, 399 cpd.data = NULL,
344 plot.col.key = plot.col.key, 400 plot_col_key = plot_col_key,
345 pdf.size=c(9,9), 401 pdf.size = c(9, 9),
346 limit=list(gene=limit, cpd=limit))) 402 limit = list(gene = limit, cpd = limit)))
347 403
348 if (is.list(pv.out)){ 404 if (is.list(pv_out)) {
349 405
350 #creating text file 406 #creating text file
351 if (!exists("DF")) { 407 if (!exists("df")) {
352 DF <- data.frame(t(mapping_summary(pv.out,species,id,id_type,pathways_list,geneID,uniprotID,mapped2geneID)),stringsAsFactors = F,check.names = F) 408 df <- data.frame(t(mapping_summary(pv_out, species, id, id_type,
409 pathways_list, geneid, uniprotid, mapped2geneid)),
410 stringsAsFactors = F, check.names = F)
353 } else { 411 } else {
354 #print (mapping_summary(pv.out,species,id)) 412 df <- rbind(df, data.frame(t(mapping_summary(pv_out, species, id,
355 DF <- rbind(DF,data.frame(t(mapping_summary(pv.out,species,id,id_type,pathways_list,geneID,uniprotID,mapped2geneID)),stringsAsFactors = F,check.names = F)) 413 id_type, pathways_list, geneid, uniprotid, mapped2geneid)),
414 stringsAsFactors = F, check.names = F))
356 } 415 }
357 } 416 }
358 } 417 }
359 418
360 DF <- as.data.frame(apply(DF, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA 419 df <- as.data.frame(apply(df, c(1, 2),
361 420 function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA
421
362 #text file output 422 #text file output
363 write.table(DF,file=args$output,quote=FALSE, sep='\t',row.names = FALSE, col.names = TRUE) 423 write.table(df, file = args$output, quote = FALSE, sep = "\t",
424 row.names = FALSE, col.names = TRUE)
364 } 425 }
365 426
366 main() 427 main()