Mercurial > repos > proteore > proteore_pathview_mapping
view PathView.R @ 8:33c9bd75882a draft
planemo upload commit 1953798eb28b6e45c78342ed4c6a6a978d096fca
| author | proteore |
|---|---|
| date | Thu, 30 Aug 2018 06:12:51 -0400 |
| parents | b617d4bbebf8 |
| children | 560541195c3f |
line wrap: on
line source
#!/usr/bin/Rscript #Rscript made for mapping genesID on KEGG pathway with Pathview package #input : csv file containing ids (uniprot or geneID) to map, plus parameters #output : KEGG pathway : jpeg or pdf file. suppressMessages(library("pathview")) read_file <- function(path,header){ file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE) if (inherits(file,"try-error")){ stop("File not found !") }else{ return(file) } } ##### fuction to clean and concatenate pathway name (allow more flexibility for user input) concat_string <- function(x){ x <- gsub(" - .*","",x) x <- gsub(" ","",x) x <- gsub("-","",x) x <- gsub("_","",x) x <- gsub(",","",x) x <- gsub("\\'","",x) x <- gsub("\\(.*)","",x) x <- gsub("\\/","",x) x <- tolower(x) return(x) } get_args <- function(){ ## Collect arguments args <- commandArgs(TRUE) ## Default setting when no arguments passed if(length(args) < 1) { args <- c("--help") } ## Help section if("--help" %in% args) { cat("Pathview R script Arguments: --help Print this test --input path of the input file (must contains a colum of uniprot and/or geneID accession number) --id_list list of ids to use, ',' separated --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412 --id_type Type of accession number ('uniprotID' or 'geneID') --id_column Column containing accesion number of interest (ex : 'c1') --header Boolean, TRUE if header FALSE if not --ouput Output filename --expression_values1 Column containing expression values (first condition) --expression_values2 Column containing expression values (second condition) --expression_values3 Column containing expression values (third condition) --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph --species KEGG species (hsa, mmu, ...) --pathways_input Tab with pathways in a column, output format of find_pathways --pathway_col Column of pathways to use --header2 Boolean, TRUE if header FALSE if not Example: ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n") q(save="no") } parseArgs <- function(x) strsplit(sub("^--", "", x), "=") argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 return(args) } str2bool <- function(x){ if (any(is.element(c("t","true"),tolower(x)))){ return (TRUE) }else if (any(is.element(c("f","false"),tolower(x)))){ return (FALSE) }else{ return(NULL) } } is.letter <- function(x) grepl("[[:alpha:]]", x) #### hsa00010 -> 00010 remove_kegg_prefix <- function(x){ x = gsub(":","",x) if (substr(x,1,4) == 'path'){ x=substr(x,5,nchar(x)) } if (is.letter(substr(x,1,3))){ x <- substr(x,4,nchar(x)) } return(x) } clean_bad_character <- function(string) { string <- gsub("X","",string) string <- gsub(" ","",string) return(string) } args <- get_args() ###save and load args in rda file for testing #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda") #load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda") ###setting variables if (!is.null(args$pathways_id)) { ids <- sapply(rapply(strsplit(clean_bad_character(args$pathways_id),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) }else if (!is.null(args$pathways_input)){ header2 <- str2bool(args$header2) pathway_col <- as.numeric(gsub("c", "" ,args$pathway_col)) pathways_file = read_file(args$pathways_input,header2) ids <- sapply(rapply(strsplit(clean_bad_character(pathways_file[,pathway_col]),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) } #if (!is.null(args$pathways_name)) {names <- as.vector(sapply(strsplit(args$pathways_name,","), function(x) concat_string(x),USE.NAMES = FALSE))} if (!is.null(args$id_list)) {id_list <- as.vector(strsplit(clean_bad_character(args$id_list),","))} id_type <- tolower(args$id_type) ncol <- as.numeric(gsub("c", "" ,args$id_column)) header <- str2bool(args$header) #output <- args$output native_kegg <- str2bool(args$native_kegg) species=args$species #org list used in mapped2geneID org <- c('Hs','Mm') names(org) <- c('hsa','mmu') #read input file or list if (!is.null(args$input)){ tab <- read_file(args$input,header) tab <- data.frame(tab[which(tab[ncol]!=""),]) } else { tab <- data.frame(id_list) ncol=1 } e1 <- as.numeric(gsub("c", "" ,args$expression_values1)) if (!is.null(args$expression_values1)) { colnames(tab)[e1] <- "e1" } e2 <- as.numeric(gsub("c", "" ,args$expression_values2)) if (!is.null(args$expression_values2)) { colnames(tab)[e2] <- "e2" } e3 <- as.numeric(gsub("c", "" ,args$expression_values3)) if (!is.null(args$expression_values3)) { colnames(tab)[e3] <- "e3" } ##### map uniprotID to entrez geneID if (id_type == "uniprotid") { uniprotID = tab[,ncol] mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL) geneID = mapped2geneID[,2] tab = cbind(tab,geneID) }else if (id_type == "geneid"){ colnames(tab)[ncol] <- "geneID" } geneID = tab$geneID[which(tab$geneID !="NA")] geneID = gsub(" ","",geneID) geneID = unlist(strsplit(geneID,"[;]")) #### get hsa pathways list #download.file(url = "http://rest.kegg.jp/link/pathway/hsa", destfile = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv") #geneid_hsa_pathways <- read_file(path = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv",FALSE) #names(geneid_hsa_pathways) <- c("geneID","pathway") ##### build matrix to map on KEGG pathway (kgml : KEGG xml) if (!is.null(args$expression_values1)&is.null(args$expression_values2)&is.null(args$expression_values3)){ mat <- as.data.frame(cbind(tab$e1)[which(!is.na(tab$geneID)),]) row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] } else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&is.null(args$expression_values3)){ mat <- as.data.frame(cbind(tab$e1,tab$e2)[which(!is.na(tab$geneID)),]) row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] }else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&!is.null(args$expression_values3)){ mat <- as.data.frame(cbind(tab$e1,tab$e2,tab$e3)[which(!is.na(tab$geneID)),]) row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] } else { mat <- geneID } #### simulation data test #exp1 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=100) #exp2 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=50) #exp3 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=10) #tab <- cbind(tab,exp1,exp2,exp3) #write.table(tab, file='/home/dchristiany/proteore_project/ProteoRE/tools/pathview/Lacombe_sim_expression_data.tsv', quote=FALSE, sep='\t',row.names = FALSE) #mat <- exp1[1:nrow(tab)] #names(mat) <- geneID #####mapping geneID (with or without expression values) on KEGG pathway plot.col.key=!is.null(tab$e1) #if there's no exrepession data, we don't show the color key for (id in ids) { pathview(gene.data = mat, #gene.idtype = "geneID", #cpd.data = uniprotID, #cpd.idtype = "uniprot", pathway.id = id, #pathway.name = "", species = species, kegg.dir = ".", gene.idtype = "entrez", #gene.annotpkg = NULL, #min.nnodes = 3, kegg.native = native_kegg, #map.null = TRUE, #expand.node = FALSE, #split.group = FALSE, #map.symbol = TRUE, #map.cpdname = TRUE, #node.sum = "sum", #discrete=list(gene=FALSE,cpd=FALSE), #limit = list(gene = 1, cpd = 1), #bins = list(gene = 10, cpd = 10), #both.dirs = list(gene = T, cpd = T), #trans.fun = list(gene = NULL, cpd = NULL), #low = list(gene = "green", cpd = "blue"), #mid = list(gene = "gray", cpd = "gray"), #high = list(gene = "red", cpd = "yellow"), #na.col = "transparent", #sign.pos="bottomleft", plot.col.key = plot.col.key, #high = "lightgreen", #key.pos="topright", #new.signature=TRUE, #rankdir="LB", #cex=0.3, #text.width=15, #res=300, pdf.size=c(9,9)) #is.signal=TRUE) } ########using keggview.native #xml.file=system.file("extdata", "hsa00010.xml", package = "pathview") #node.data=node.info("/home/dchristiany/hsa00010.xml") #plot.data.gene=node.map(mol.data=test, node.data, node.types="gene") #colors =node.color(plot.data = plot.data.gene[,1:9])
