comparison PathView.R @ 0:097bb3ed2b4d draft

planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
author proteore
date Wed, 22 Aug 2018 11:30:23 -0400
parents
children b617d4bbebf8
comparison
equal deleted inserted replaced
-1:000000000000 0:097bb3ed2b4d
1 #!/usr/bin/Rscript
2 #Rscript made for mapping genesID on KEGG pathway with Pathview package
3 #input : csv file containing ids (uniprot or geneID) to map, plus parameters
4 #output : KEGG pathway : jpeg or pdf file.
5
6 suppressMessages(library("pathview"))
7
8 read_file <- function(path,header){
9 file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE)
10 if (inherits(file,"try-error")){
11 stop("File not found !")
12 }else{
13 return(file)
14 }
15 }
16
17 ##### fuction to clean and concatenate pathway name (allow more flexibility for user input)
18 concat_string <- function(x){
19 x <- gsub(" - .*","",x)
20 x <- gsub(" ","",x)
21 x <- gsub("-","",x)
22 x <- gsub("_","",x)
23 x <- gsub(",","",x)
24 x <- gsub("\\'","",x)
25 x <- gsub("\\(.*)","",x)
26 x <- gsub("\\/","",x)
27 x <- tolower(x)
28 return(x)
29 }
30
31
32 get_args <- function(){
33
34 ## Collect arguments
35 args <- commandArgs(TRUE)
36
37 ## Default setting when no arguments passed
38 if(length(args) < 1) {
39 args <- c("--help")
40 }
41
42 ## Help section
43 if("--help" %in% args) {
44 cat("Pathview R script
45 Arguments:
46 --help Print this test
47 --input path of the input file (must contains a colum of uniprot and/or geneID accession number)
48 --id_list list of ids to use, ',' separated
49 --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412
50 --id_type Type of accession number ('uniprotID' or 'geneID')
51 --id_column Column containing accesion number of interest (ex : 'c1')
52 --header Boolean, TRUE if header FALSE if not
53 --ouput Output filename
54 --expression_values1 Column containing expression values (first condition)
55 --expression_values2 Column containing expression values (second condition)
56 --expression_values3 Column containing expression values (third condition)
57 --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph
58 --species KEGG species (hsa, mmu, ...)
59
60 Example:
61 ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n")
62
63 q(save="no")
64 }
65
66
67 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
68 #load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
69 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
70 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
71 args <- as.list(as.character(argsDF$V2))
72 names(args) <- argsDF$V1
73
74 return(args)
75 }
76
77 str2bool <- function(x){
78 if (any(is.element(c("t","true"),tolower(x)))){
79 return (TRUE)
80 }else if (any(is.element(c("f","false"),tolower(x)))){
81 return (FALSE)
82 }else{
83 return(NULL)
84 }
85 }
86
87 is.letter <- function(x) grepl("[[:alpha:]]", x)
88
89 #### hsa00010 -> 00010
90 remove_kegg_prefix <- function(x){
91 if (is.letter(substr(x,1,3))){
92 x <- substr(x,4,nchar(x))
93 }
94 return(x)
95 }
96
97 clean_bad_character <- function(string) {
98 string <- gsub("X","",string)
99 string <- gsub(" ","",string)
100 return(string)
101 }
102
103 args <- get_args()
104
105 ###save and load args in rda file for testing
106 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
107 #load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
108
109 ###setting variables
110 if (!is.null(args$pathways_id)) { ids <- sapply(rapply(strsplit(clean_bad_character(args$pathways_id),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE)}
111 #if (!is.null(args$pathways_name)) {names <- as.vector(sapply(strsplit(args$pathways_name,","), function(x) concat_string(x),USE.NAMES = FALSE))}
112 if (!is.null(args$id_list)) {id_list <- as.vector(strsplit(clean_bad_character(args$id_list),","))}
113 id_type <- tolower(args$id_type)
114 ncol <- as.numeric(gsub("c", "" ,args$id_column))
115 header <- str2bool(args$header)
116 #output <- args$output
117 native_kegg <- str2bool(args$native_kegg)
118 species=args$species
119
120
121 #read input file or list
122 if (!is.null(args$input)){
123 tab <- read_file(args$input,header)
124 tab <- tab[!apply(is.na(tab) | tab == "", 1, all),] #delete empty rows
125 } else {
126 tab <- data.frame(id_list)
127 ncol=1
128 }
129
130 e1 <- as.numeric(gsub("c", "" ,args$expression_values1))
131 if (!is.null(args$expression_values1)) { colnames(tab)[e1] <- "e1" }
132 e2 <- as.numeric(gsub("c", "" ,args$expression_values2))
133 if (!is.null(args$expression_values2)) { colnames(tab)[e2] <- "e2" }
134 e3 <- as.numeric(gsub("c", "" ,args$expression_values3))
135 if (!is.null(args$expression_values3)) { colnames(tab)[e3] <- "e3" }
136
137
138 ##### map uniprotID to entrez geneID
139 if (id_type == "uniprotid") {
140
141 uniprotID = tab[,ncol]
142 mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = "Hs", pkg.name = NULL)
143 geneID = mapped2geneID[,2]
144 tab = cbind(tab,geneID)
145
146 }else if (id_type == "geneid"){
147
148 colnames(tab)[ncol] <- "geneID"
149
150 }
151
152 geneID = tab$geneID[which(tab$geneID !="NA")]
153 geneID = gsub(" ","",geneID)
154 geneID = unlist(strsplit(geneID,"[;]"))
155
156
157 #### get hsa pathways list
158 #download.file(url = "http://rest.kegg.jp/link/pathway/hsa", destfile = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv")
159 #geneid_hsa_pathways <- read_file(path = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv",FALSE)
160 #names(geneid_hsa_pathways) <- c("geneID","pathway")
161
162 ##### build matrix to map on KEGG pathway (kgml : KEGG xml)
163 if (!is.null(args$expression_values1)&is.null(args$expression_values2)&is.null(args$expression_values3)){
164 mat <- as.data.frame(cbind(tab$e1)[which(!is.na(tab$geneID)),])
165 row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))]
166 } else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&is.null(args$expression_values3)){
167 mat <- as.data.frame(cbind(tab$e1,tab$e2)[which(!is.na(tab$geneID)),])
168 row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))]
169 }else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&!is.null(args$expression_values3)){
170 mat <- as.data.frame(cbind(tab$e1,tab$e2,tab$e3)[which(!is.na(tab$geneID)),])
171 row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))]
172 } else {
173 mat <- geneID
174 }
175
176
177 #### simulation data test
178 #exp1 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=100)
179 #exp2 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=50)
180 #exp3 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=10)
181 #tab <- cbind(tab,exp1,exp2,exp3)
182
183 #write.table(tab, file='/home/dchristiany/proteore_project/ProteoRE/tools/pathview/Lacombe_sim_expression_data.tsv', quote=FALSE, sep='\t',row.names = FALSE)
184
185 #mat <- exp1[1:nrow(tab)]
186 #names(mat) <- geneID
187
188
189 #####mapping geneID (with or without expression values) on KEGG pathway
190 for (id in ids) {
191 pathview(gene.data = mat,
192 #gene.idtype = "geneID",
193 #cpd.data = uniprotID,
194 #cpd.idtype = "uniprot",
195 pathway.id = id,
196 #pathway.name = "",
197 species = species,
198 kegg.dir = ".",
199 gene.idtype = "entrez",
200 #gene.annotpkg = NULL,
201 #min.nnodes = 3,
202 kegg.native = native_kegg,
203 #map.null = TRUE,
204 #expand.node = FALSE,
205 #split.group = FALSE,
206 #map.symbol = TRUE,
207 #map.cpdname = TRUE,
208 #node.sum = "sum",
209 #discrete=list(gene=FALSE,cpd=FALSE),
210 #limit = list(gene = 1, cpd = 1),
211 #bins = list(gene = 10, cpd = 10),
212 #both.dirs = list(gene = T, cpd = T),
213 #trans.fun = list(gene = NULL, cpd = NULL),
214 #low = list(gene = "green", cpd = "blue"),
215 #mid = list(gene = "gray", cpd = "gray"),
216 #high = list(gene = "red", cpd = "yellow"),
217 #na.col = "transparent",
218 #sign.pos="bottomleft",
219 #key.pos="topright",
220 #new.signature=TRUE,
221 #rankdir="LB",
222 #cex=0.3,
223 #text.width=15,
224 #res=300,
225 pdf.size=c(9,9))
226 #is.signal=TRUE)
227 }
228
229 ########using keggview.native
230
231 #xml.file=system.file("extdata", "hsa00010.xml", package = "pathview")
232 #node.data=node.info("/home/dchristiany/hsa00010.xml")
233 #plot.data.gene=node.map(mol.data=test, node.data, node.types="gene")
234 #colors =node.color(plot.data = plot.data.gene[,1:9])