annotate kegg_maps_visualization.R @ 9:9ac0ade20b3f draft

"planemo upload commit ba867b8fa3352695fbda1ae764407f363ee79a50-dirty"
author proteore
date Wed, 08 Jan 2020 09:03:41 +0000
parents ab697ebe0f41
children a4db9efe1e43
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
1 #!/usr/bin/Rscript
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
2 #Rscript made for mapping genesID on KEGG pathway with Pathview package
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
3 #input : csv file containing ids (uniprot or geneID) to map, plus parameters
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
4 #output : KEGG pathway : jpeg or pdf file.
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
5
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
6 options(warn=-1) #TURN OFF WARNINGS !!!!!!
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
7 suppressMessages(library("pathview"))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
8 suppressMessages(library(KEGGREST))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
9
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
10 read_file <- function(path,header){
4
4cc2265d1294 planemo upload commit 93c44875036dbc9b3b9c1416566cf91004b97429-dirty
proteore
parents: 2
diff changeset
11 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F, comment.char = ""),silent=TRUE)
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
12 if (inherits(file,"try-error")){
4
4cc2265d1294 planemo upload commit 93c44875036dbc9b3b9c1416566cf91004b97429-dirty
proteore
parents: 2
diff changeset
13 stop("Read file error ! Please check your file (header, # character, etc) ")
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
14 }else{
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
15 return(file)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
16 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
17 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
18
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
19 ##### fuction to clean and concatenate pathway name (allow more flexibility for user input)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
20 concat_string <- function(x){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
21 x <- gsub(" - .*","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
22 x <- gsub(" ","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
23 x <- gsub("-","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
24 x <- gsub("_","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
25 x <- gsub(",","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
26 x <- gsub("\\'","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
27 x <- gsub("\\(.*)","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
28 x <- gsub("\\/","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
29 x <- tolower(x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
30 return(x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
31 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
32
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
33 #return output suffix (pathway name) from id kegg (ex : hsa:00010)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
34 get_suffix <- function(pathways_list,species,id){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
35 suffix = gsub("/","or",pathways_list[pathways_list[,1]==paste(species,id,sep=""),2])
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
36 suffix = gsub(" ","_",suffix)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
37 if (nchar(suffix) > 50){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
38 suffix = substr(suffix,1,50)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
39 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
40 return(suffix)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
41 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
42
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
43 str2bool <- function(x){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
44 if (any(is.element(c("t","true"),tolower(x)))){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
45 return (TRUE)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
46 }else if (any(is.element(c("f","false"),tolower(x)))){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
47 return (FALSE)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
48 }else{
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
49 return(NULL)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
50 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
51 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
52
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
53 is.letter <- function(x) grepl("[[:alpha:]]", x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
54
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
55 #### hsa00010 -> 00010
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
56 remove_kegg_prefix <- function(x){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
57 x = gsub(":","",x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
58 if (substr(x,1,4) == 'path'){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
59 x=substr(x,5,nchar(x))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
60 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
61 if (is.letter(substr(x,1,3))){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
62 x <- substr(x,4,nchar(x))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
63 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
64 return(x)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
65 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
66
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
67 kegg_to_geneID <- function(vector){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
68 vector <- sapply(vector, function(x) unlist(strsplit(x,":"))[2],USE.NAMES = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
69 return (vector)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
70 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
71
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
72 clean_bad_character <- function(string) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
73 string <- gsub("X","",string)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
74 return(string)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
75 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
76
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
77 get_list_from_cp <-function(list){
5
f4e02d20546b planemo upload commit c6b2e31fccac66c2caa687b375cbf9947f7ba88e-dirty
proteore
parents: 4
diff changeset
78 list = gsub(";","\t",list)
4
4cc2265d1294 planemo upload commit 93c44875036dbc9b3b9c1416566cf91004b97429-dirty
proteore
parents: 2
diff changeset
79 list = gsub(",","\t",list)
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
80 list = strsplit(list, "[ \t\n]+")[[1]]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
81 list = list[list != ""] #remove empty entry
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
82 list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
83 return(list)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
84 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
85
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
86 get_ref_pathways <- function(species){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
87 ##all available pathways for the species
8
ab697ebe0f41 "planemo upload commit 7e03a2492a127f75ab5d896a8a9dd21d64f7890e-dirty"
proteore
parents: 5
diff changeset
88 pathways <- keggLink("pathway", species)
ab697ebe0f41 "planemo upload commit 7e03a2492a127f75ab5d896a8a9dd21d64f7890e-dirty"
proteore
parents: 5
diff changeset
89 tot_path <- unique(pathways)
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
90
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
91 ##formating the dat into a list object
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
92 ##key= pathway ID, value = genes of the pathway in the kegg format
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
93 pathways_list <- sapply(tot_path, function(pathway) names(which(pathways==pathway)))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
94 return (pathways_list)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
95 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
96
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
97 mapping_summary <- function(pv.out,species,id,id_type,pathways_list,geneID,uniprotID,mapped2geneID){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
98 ref_pathways = get_ref_pathways(species)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
99 names(ref_pathways) <- sapply(names(ref_pathways), function(x) gsub("path:[a-z]{3}","",x),USE.NAMES = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
100
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
101 #genes present in pathway
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
102 genes = ref_pathways[id][[1]]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
103 nb_genes = length(genes)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
104
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
105 #genes mapped on pathway genes
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
106 mapped <- unlist(sapply(pv.out$plot.data.gene$all.mapped, function(x) strsplit(x,",")),use.names = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
107 mapped = unique(mapped[mapped!=""])
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
108 nb_mapped <- length(mapped)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
109
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
110 #compue ratio of mapping
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
111 ratio = round((nb_mapped/nb_genes)*100, 2)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
112 if (is.nan(ratio)) { ratio = ""}
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
113 pathway_id = paste(species,id,sep="")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
114 pathway_name = as.character(pathways_list[pathways_list[,1]==pathway_id,][2])
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
115
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
116 if (id_type=="geneid" || id_type=="keggid") {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
117 row <- c(pathway_id,pathway_name,length(unique(geneID)),nb_mapped,nb_genes,ratio,paste(mapped,collapse=";"))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
118 names(row) <- c("KEGG pathway ID","pathway name","nb of Entrez gene ID used","nb of Entrez gene ID mapped",
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
119 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
120 } else if (id_type=="uniprotid") {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
121 row <- c(pathway_id,pathway_name,length(unique(uniprotID)),length(unique(geneID)),nb_mapped,nb_genes,ratio,paste(mapped,collapse=";"),paste(mapped2geneID[which(mapped2geneID[,2] %in% mapped)],collapse=";"))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
122 names(row) <- c("KEGG pathway ID","pathway name","nb of Uniprot_AC used","nb of Entrez gene ID used","nb of Entrez gene ID mapped",
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
123 "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped","uniprot_AC mapped")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
124 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
125 return(row)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
126 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
127
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
128 #take data frame, return data frame
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
129 split_ids_per_line <- function(line,ncol){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
130
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
131 #print (line)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
132 header = colnames(line)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
133 line[ncol] = gsub("[[:blank:]]|\u00A0","",line[ncol])
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
134
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
135 if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
136 if (length(line)==1 ) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
137 lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
138 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
139 if (ncol==1) { #first column
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
140 lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)]))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
141 } else if (ncol==length(line)) { #last column
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
142 lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";"))))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
143 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
144 lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)]))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
145 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
146 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
147 colnames(lines)=header
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
148 return(lines)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
149 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
150 return(line)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
151 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
152 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
153
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
154 #create new lines if there's more than one id per cell in the columns in order to have only one id per line
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
155 one_id_one_line <-function(tab,ncol){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
156
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
157 if (ncol(tab)>1){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
158
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
159 tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
160 header=colnames(tab)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
161 res=as.data.frame(matrix(ncol=ncol(tab),nrow=0))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
162 for (i in 1:nrow(tab) ) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
163 lines = split_ids_per_line(tab[i,],ncol)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
164 res = rbind(res,lines)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
165 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
166 }else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
167 res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
168 res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
169 colnames(res)=colnames(tab)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
170 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
171 return(res)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
172 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
173
2
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
174 get_limit <- function(mat) {
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
175 min = min(apply(mat,2,min))
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
176 max = max(apply(mat,2,max))
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
177 return(c(min,max))
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
178 }
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
179
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
180 get_args <- function(){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
181
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
182 ## Collect arguments
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
183 args <- commandArgs(TRUE)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
184
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
185 ## Default setting when no arguments passed
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
186 if(length(args) < 1) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
187 args <- c("--help")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
188 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
189
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
190 ## Help section
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
191 if("--help" %in% args) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
192 cat("Pathview R script
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
193 Arguments:
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
194 --help Print this test
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
195 --input path of the input file (must contains a colum of uniprot and/or geneID accession number)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
196 --id_list list of ids to use, ',' separated
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
197 --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
198 --id_type Type of accession number ('uniprotID' or 'geneID')
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
199 --id_column Column containing accesion number of interest (ex : 'c1')
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
200 --header Boolean, TRUE if header FALSE if not
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
201 --output Output filename
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
202 --fold_change_col Column(s) containing fold change values (comma separated)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
203 --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
204 --species KEGG species (hsa, mmu, ...)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
205 --pathways_input Tab with pathways in a column, output format of find_pathways
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
206 --pathway_col Column of pathways to use
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
207 --header2 Boolean, TRUE if header FALSE if not
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
208 --pathways_list path of file containg the species pathways list (hsa_pathways.loc, mmu_pathways.loc, ...)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
209
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
210 Example:
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
211 ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
212
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
213 q(save="no")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
214 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
215
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
216 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
217 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
218 args <- as.list(as.character(argsDF$V2))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
219 names(args) <- argsDF$V1
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
220
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
221 return(args)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
222 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
223
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
224 main <- function(){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
225
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
226 args <- get_args()
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
227
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
228 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/kegg_maps_visualization/args.Rda")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
229 #load("/home/dchristiany/proteore_project/ProteoRE/tools/kegg_maps_visualization/args.Rda")
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
230
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
231 ###setting variables
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
232 if (!is.null(args$pathways_id)) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
233 ids <- get_list_from_cp(clean_bad_character(args$pathways_id))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
234 ids <- sapply(ids, function(x) remove_kegg_prefix(x),USE.NAMES = FALSE)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
235 }else if (!is.null(args$pathways_input)){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
236 header2 <- str2bool(args$header2)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
237 pathway_col <- as.numeric(gsub("c", "" ,args$pathway_col))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
238 pathways_file = read_file(args$pathways_input,header2)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
239 ids <- sapply(rapply(strsplit(clean_bad_character(pathways_file[,pathway_col]),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
240 }
9
9ac0ade20b3f "planemo upload commit ba867b8fa3352695fbda1ae764407f363ee79a50-dirty"
proteore
parents: 8
diff changeset
241 if (args$native_kegg) { ids <- ids[ids != "04215"] }
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
242 pathways_list <- read_file(args$pathways_list,F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
243 if (!is.null(args$id_list)) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
244 id_list <- get_list_from_cp(args$id_list)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
245 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
246 id_type <- tolower(args$id_type)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
247 ncol <- as.numeric(gsub("c", "" ,args$id_column))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
248 header <- str2bool(args$header)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
249 native_kegg <- str2bool(args$native_kegg)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
250 species=args$species
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
251 fold_change_data = str2bool(args$fold_change_data)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
252
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
253 #org list used in mapped2geneID
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
254 org <- c('Hs','Mm','Rn')
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
255 names(org) <- c('hsa','mmu','rno')
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
256
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
257 #read input file or list
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
258 if (!is.null(args$input)){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
259 tab <- read_file(args$input,header)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
260 tab <- data.frame(tab[which(tab[ncol]!=""),],stringsAsFactors = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
261 tab = one_id_one_line(tab,ncol)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
262 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
263 id_list = gsub("[[:blank:]]|\u00A0|NA","",id_list)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
264 id_list = unique(id_list[id_list!=""])
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
265 tab <- data.frame(id_list,stringsAsFactors = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
266 ncol=1
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
267 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
268
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
269
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
270 ##### map uniprotID to entrez geneID and kegg to geneID
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
271 uniprotID=""
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
272 mapped2geneID=""
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
273 if (id_type == "uniprotid") {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
274 uniprotID=tab[,ncol]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
275 mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
276 geneID = mapped2geneID[,2]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
277 tab = cbind(tab,geneID)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
278 ncol=ncol(tab)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
279 }else if (id_type == "keggid"){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
280 keggID = tab[,ncol]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
281 geneID = kegg_to_geneID(keggID)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
282 tab = cbind(tab,geneID)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
283 ncol=ncol(tab)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
284 }else if (id_type == "geneid"){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
285 colnames(tab)[ncol] <- "geneID"
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
286 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
287
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
288 ##### build matrix to map on KEGG pathway (kgml : KEGG xml)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
289 geneID_indices = which(!is.na(tab$geneID))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
290 if (fold_change_data) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
291 fold_change <- as.integer(unlist(strsplit(gsub("c","",args$fold_change_col),",")))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
292 if (length(fold_change) > 3) { fold_change= fold_change[1:3] }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
293 if (length(fold_change)==1){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
294 tab[,fold_change] <- as.double(gsub(",",".",as.character(tab[,fold_change]) ))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
295 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
296 tab[,fold_change] <- apply(tab[,fold_change],2,function(x) as.double(gsub(",",".",as.character(x))))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
297 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
298 mat = tab[geneID_indices,c(ncol,fold_change)]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
299 mat = mat[(!duplicated(mat$geneID)),]
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
300 geneID=mat$geneID
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
301 mat = as.data.frame(mat[,-1])
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
302 row.names(mat)=geneID
2
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
303 limit = get_limit(mat)
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
304 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
305 mat = unique(as.character(tab$geneID[!is.na(tab$geneID[tab$geneID!=""])]))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
306 geneID=mat
2
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
307 limit=1
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
308 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
309
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
310 #####mapping geneID (with or without expression values) on KEGG pathway
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
311 plot.col.key= TRUE
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
312 low_color = "green"
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
313 mid_color = "#F3F781" #yellow
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
314 high_color = "red"
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
315 if (!fold_change_data) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
316 plot.col.key= FALSE #if there's no exrepession data, we don't show the color key
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
317 high_color = "#81BEF7" #blue
2
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
318 }
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
319
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
320 #create graph(s) and text output
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
321 for (id in ids) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
322 suffix= get_suffix(pathways_list,species,id)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
323 pv.out <- suppressMessages(pathview(gene.data = mat,
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
324 gene.idtype = "entrez",
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
325 pathway.id = id,
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
326 species = species,
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
327 kegg.dir = ".",
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
328 out.suffix=suffix,
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
329 kegg.native = native_kegg,
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
330 low = list(gene = low_color, cpd = "blue"),
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
331 mid = list(gene = mid_color, cpd = "transparent"),
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
332 high = list(gene = high_color, cpd = "yellow"),
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
333 na.col="#D8D8D8", #gray
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
334 cpd.data=NULL,
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
335 plot.col.key = plot.col.key,
2
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
336 pdf.size=c(9,9),
8a6863adcd09 planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
337 limit=list(gene=limit, cpd=limit)))
0
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
338
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
339 if (is.list(pv.out)){
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
340
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
341 #creating text file
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
342 if (!exists("DF")) {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
343 DF <- data.frame(t(mapping_summary(pv.out,species,id,id_type,pathways_list,geneID,uniprotID,mapped2geneID)),stringsAsFactors = F,check.names = F)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
344 } else {
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
345 #print (mapping_summary(pv.out,species,id))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
346 DF <- rbind(DF,data.frame(t(mapping_summary(pv.out,species,id,id_type,pathways_list,geneID,uniprotID,mapped2geneID)),stringsAsFactors = F,check.names = F))
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
347 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
348 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
349 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
350
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
351 DF <- as.data.frame(apply(DF, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
352
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
353 #text file output
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
354 write.table(DF,file=args$output,quote=FALSE, sep='\t',row.names = FALSE, col.names = TRUE)
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
355 }
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
356
789acaab8255 planemo upload commit 78ad61e52c2bf8c5ffada89a8eed429a332eb40b-dirty
proteore
parents:
diff changeset
357 main()