Mercurial > repos > proteore > proteore_pathview_mapping
comparison PathView.R @ 0:097bb3ed2b4d draft
planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
| author | proteore |
|---|---|
| date | Wed, 22 Aug 2018 11:30:23 -0400 |
| parents | |
| children | b617d4bbebf8 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:097bb3ed2b4d |
|---|---|
| 1 #!/usr/bin/Rscript | |
| 2 #Rscript made for mapping genesID on KEGG pathway with Pathview package | |
| 3 #input : csv file containing ids (uniprot or geneID) to map, plus parameters | |
| 4 #output : KEGG pathway : jpeg or pdf file. | |
| 5 | |
| 6 suppressMessages(library("pathview")) | |
| 7 | |
| 8 read_file <- function(path,header){ | |
| 9 file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE) | |
| 10 if (inherits(file,"try-error")){ | |
| 11 stop("File not found !") | |
| 12 }else{ | |
| 13 return(file) | |
| 14 } | |
| 15 } | |
| 16 | |
| 17 ##### fuction to clean and concatenate pathway name (allow more flexibility for user input) | |
| 18 concat_string <- function(x){ | |
| 19 x <- gsub(" - .*","",x) | |
| 20 x <- gsub(" ","",x) | |
| 21 x <- gsub("-","",x) | |
| 22 x <- gsub("_","",x) | |
| 23 x <- gsub(",","",x) | |
| 24 x <- gsub("\\'","",x) | |
| 25 x <- gsub("\\(.*)","",x) | |
| 26 x <- gsub("\\/","",x) | |
| 27 x <- tolower(x) | |
| 28 return(x) | |
| 29 } | |
| 30 | |
| 31 | |
| 32 get_args <- function(){ | |
| 33 | |
| 34 ## Collect arguments | |
| 35 args <- commandArgs(TRUE) | |
| 36 | |
| 37 ## Default setting when no arguments passed | |
| 38 if(length(args) < 1) { | |
| 39 args <- c("--help") | |
| 40 } | |
| 41 | |
| 42 ## Help section | |
| 43 if("--help" %in% args) { | |
| 44 cat("Pathview R script | |
| 45 Arguments: | |
| 46 --help Print this test | |
| 47 --input path of the input file (must contains a colum of uniprot and/or geneID accession number) | |
| 48 --id_list list of ids to use, ',' separated | |
| 49 --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412 | |
| 50 --id_type Type of accession number ('uniprotID' or 'geneID') | |
| 51 --id_column Column containing accesion number of interest (ex : 'c1') | |
| 52 --header Boolean, TRUE if header FALSE if not | |
| 53 --ouput Output filename | |
| 54 --expression_values1 Column containing expression values (first condition) | |
| 55 --expression_values2 Column containing expression values (second condition) | |
| 56 --expression_values3 Column containing expression values (third condition) | |
| 57 --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph | |
| 58 --species KEGG species (hsa, mmu, ...) | |
| 59 | |
| 60 Example: | |
| 61 ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n") | |
| 62 | |
| 63 q(save="no") | |
| 64 } | |
| 65 | |
| 66 | |
| 67 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda") | |
| 68 #load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda") | |
| 69 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
| 70 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
| 71 args <- as.list(as.character(argsDF$V2)) | |
| 72 names(args) <- argsDF$V1 | |
| 73 | |
| 74 return(args) | |
| 75 } | |
| 76 | |
| 77 str2bool <- function(x){ | |
| 78 if (any(is.element(c("t","true"),tolower(x)))){ | |
| 79 return (TRUE) | |
| 80 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
| 81 return (FALSE) | |
| 82 }else{ | |
| 83 return(NULL) | |
| 84 } | |
| 85 } | |
| 86 | |
| 87 is.letter <- function(x) grepl("[[:alpha:]]", x) | |
| 88 | |
| 89 #### hsa00010 -> 00010 | |
| 90 remove_kegg_prefix <- function(x){ | |
| 91 if (is.letter(substr(x,1,3))){ | |
| 92 x <- substr(x,4,nchar(x)) | |
| 93 } | |
| 94 return(x) | |
| 95 } | |
| 96 | |
| 97 clean_bad_character <- function(string) { | |
| 98 string <- gsub("X","",string) | |
| 99 string <- gsub(" ","",string) | |
| 100 return(string) | |
| 101 } | |
| 102 | |
| 103 args <- get_args() | |
| 104 | |
| 105 ###save and load args in rda file for testing | |
| 106 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda") | |
| 107 #load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda") | |
| 108 | |
| 109 ###setting variables | |
| 110 if (!is.null(args$pathways_id)) { ids <- sapply(rapply(strsplit(clean_bad_character(args$pathways_id),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE)} | |
| 111 #if (!is.null(args$pathways_name)) {names <- as.vector(sapply(strsplit(args$pathways_name,","), function(x) concat_string(x),USE.NAMES = FALSE))} | |
| 112 if (!is.null(args$id_list)) {id_list <- as.vector(strsplit(clean_bad_character(args$id_list),","))} | |
| 113 id_type <- tolower(args$id_type) | |
| 114 ncol <- as.numeric(gsub("c", "" ,args$id_column)) | |
| 115 header <- str2bool(args$header) | |
| 116 #output <- args$output | |
| 117 native_kegg <- str2bool(args$native_kegg) | |
| 118 species=args$species | |
| 119 | |
| 120 | |
| 121 #read input file or list | |
| 122 if (!is.null(args$input)){ | |
| 123 tab <- read_file(args$input,header) | |
| 124 tab <- tab[!apply(is.na(tab) | tab == "", 1, all),] #delete empty rows | |
| 125 } else { | |
| 126 tab <- data.frame(id_list) | |
| 127 ncol=1 | |
| 128 } | |
| 129 | |
| 130 e1 <- as.numeric(gsub("c", "" ,args$expression_values1)) | |
| 131 if (!is.null(args$expression_values1)) { colnames(tab)[e1] <- "e1" } | |
| 132 e2 <- as.numeric(gsub("c", "" ,args$expression_values2)) | |
| 133 if (!is.null(args$expression_values2)) { colnames(tab)[e2] <- "e2" } | |
| 134 e3 <- as.numeric(gsub("c", "" ,args$expression_values3)) | |
| 135 if (!is.null(args$expression_values3)) { colnames(tab)[e3] <- "e3" } | |
| 136 | |
| 137 | |
| 138 ##### map uniprotID to entrez geneID | |
| 139 if (id_type == "uniprotid") { | |
| 140 | |
| 141 uniprotID = tab[,ncol] | |
| 142 mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = "Hs", pkg.name = NULL) | |
| 143 geneID = mapped2geneID[,2] | |
| 144 tab = cbind(tab,geneID) | |
| 145 | |
| 146 }else if (id_type == "geneid"){ | |
| 147 | |
| 148 colnames(tab)[ncol] <- "geneID" | |
| 149 | |
| 150 } | |
| 151 | |
| 152 geneID = tab$geneID[which(tab$geneID !="NA")] | |
| 153 geneID = gsub(" ","",geneID) | |
| 154 geneID = unlist(strsplit(geneID,"[;]")) | |
| 155 | |
| 156 | |
| 157 #### get hsa pathways list | |
| 158 #download.file(url = "http://rest.kegg.jp/link/pathway/hsa", destfile = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv") | |
| 159 #geneid_hsa_pathways <- read_file(path = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv",FALSE) | |
| 160 #names(geneid_hsa_pathways) <- c("geneID","pathway") | |
| 161 | |
| 162 ##### build matrix to map on KEGG pathway (kgml : KEGG xml) | |
| 163 if (!is.null(args$expression_values1)&is.null(args$expression_values2)&is.null(args$expression_values3)){ | |
| 164 mat <- as.data.frame(cbind(tab$e1)[which(!is.na(tab$geneID)),]) | |
| 165 row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] | |
| 166 } else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&is.null(args$expression_values3)){ | |
| 167 mat <- as.data.frame(cbind(tab$e1,tab$e2)[which(!is.na(tab$geneID)),]) | |
| 168 row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] | |
| 169 }else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&!is.null(args$expression_values3)){ | |
| 170 mat <- as.data.frame(cbind(tab$e1,tab$e2,tab$e3)[which(!is.na(tab$geneID)),]) | |
| 171 row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] | |
| 172 } else { | |
| 173 mat <- geneID | |
| 174 } | |
| 175 | |
| 176 | |
| 177 #### simulation data test | |
| 178 #exp1 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=100) | |
| 179 #exp2 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=50) | |
| 180 #exp3 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=10) | |
| 181 #tab <- cbind(tab,exp1,exp2,exp3) | |
| 182 | |
| 183 #write.table(tab, file='/home/dchristiany/proteore_project/ProteoRE/tools/pathview/Lacombe_sim_expression_data.tsv', quote=FALSE, sep='\t',row.names = FALSE) | |
| 184 | |
| 185 #mat <- exp1[1:nrow(tab)] | |
| 186 #names(mat) <- geneID | |
| 187 | |
| 188 | |
| 189 #####mapping geneID (with or without expression values) on KEGG pathway | |
| 190 for (id in ids) { | |
| 191 pathview(gene.data = mat, | |
| 192 #gene.idtype = "geneID", | |
| 193 #cpd.data = uniprotID, | |
| 194 #cpd.idtype = "uniprot", | |
| 195 pathway.id = id, | |
| 196 #pathway.name = "", | |
| 197 species = species, | |
| 198 kegg.dir = ".", | |
| 199 gene.idtype = "entrez", | |
| 200 #gene.annotpkg = NULL, | |
| 201 #min.nnodes = 3, | |
| 202 kegg.native = native_kegg, | |
| 203 #map.null = TRUE, | |
| 204 #expand.node = FALSE, | |
| 205 #split.group = FALSE, | |
| 206 #map.symbol = TRUE, | |
| 207 #map.cpdname = TRUE, | |
| 208 #node.sum = "sum", | |
| 209 #discrete=list(gene=FALSE,cpd=FALSE), | |
| 210 #limit = list(gene = 1, cpd = 1), | |
| 211 #bins = list(gene = 10, cpd = 10), | |
| 212 #both.dirs = list(gene = T, cpd = T), | |
| 213 #trans.fun = list(gene = NULL, cpd = NULL), | |
| 214 #low = list(gene = "green", cpd = "blue"), | |
| 215 #mid = list(gene = "gray", cpd = "gray"), | |
| 216 #high = list(gene = "red", cpd = "yellow"), | |
| 217 #na.col = "transparent", | |
| 218 #sign.pos="bottomleft", | |
| 219 #key.pos="topright", | |
| 220 #new.signature=TRUE, | |
| 221 #rankdir="LB", | |
| 222 #cex=0.3, | |
| 223 #text.width=15, | |
| 224 #res=300, | |
| 225 pdf.size=c(9,9)) | |
| 226 #is.signal=TRUE) | |
| 227 } | |
| 228 | |
| 229 ########using keggview.native | |
| 230 | |
| 231 #xml.file=system.file("extdata", "hsa00010.xml", package = "pathview") | |
| 232 #node.data=node.info("/home/dchristiany/hsa00010.xml") | |
| 233 #plot.data.gene=node.map(mol.data=test, node.data, node.types="gene") | |
| 234 #colors =node.color(plot.data = plot.data.gene[,1:9]) |
