Mercurial > repos > proteore > proteore_kegg_pathways_coverage
comparison compute_kegg_pathways.R @ 1:7004924a3686 draft
planemo upload commit 2e8375a0a037aaf4042cdeb5244228d3f9ca583b-dirty
| author | proteore |
|---|---|
| date | Wed, 14 Nov 2018 05:46:35 -0500 |
| parents | 8883a7173cba |
| children |
comparison
equal
deleted
inserted
replaced
| 0:8883a7173cba | 1:7004924a3686 |
|---|---|
| 80 geneID_to_kegg <- function(vector,species){ | 80 geneID_to_kegg <- function(vector,species){ |
| 81 vector <- sapply(vector, function(x) paste(species,x,sep=":"),USE.NAMES = F) | 81 vector <- sapply(vector, function(x) paste(species,x,sep=":"),USE.NAMES = F) |
| 82 return (vector) | 82 return (vector) |
| 83 } | 83 } |
| 84 | 84 |
| 85 to_keggID <- function(id_list,id_type){ | |
| 86 if (id_type == "ncbi-geneid") { | |
| 87 id_list <- unique(geneID_to_kegg(id_list,args$species)) | |
| 88 } else if (id_type=="uniprot"){ | |
| 89 id_list <- unique(sapply(id_list, function(x) paste(id_type,":",x,sep=""),USE.NAMES = F)) | |
| 90 if (length(id_list)>250){ | |
| 91 id_list <- split(id_list, ceiling(seq_along(id_list)/250)) | |
| 92 id_list <- sapply(id_list, function(x) keggConv("genes",x)) | |
| 93 id_list <- unique(unlist(id_list)) | |
| 94 } else { | |
| 95 id_list <- unique(keggConv("genes", id_list)) | |
| 96 } | |
| 97 } else if (id_type=="kegg-id") { | |
| 98 id_list <- unique(id_list) | |
| 99 } | |
| 100 return (id_list) | |
| 101 } | |
| 102 | |
| 85 kegg_mapping<- function(kegg_id_list,id_type,ref_ids) { | 103 kegg_mapping<- function(kegg_id_list,id_type,ref_ids) { |
| 86 | |
| 87 #convert to KEGG ID | |
| 88 #if (id_type!="kegg-id"){ | |
| 89 # id_list <- unique(sapply(id_list, function(x) paste(id_type,":",x,sep=""),USE.NAMES = F)) | |
| 90 # if (length(id_list)>250){ | |
| 91 # id_list <- split(id_list, ceiling(seq_along(id_list)/250)) | |
| 92 # id_list <- sapply(id_list, function(x) keggConv("genes",x)) | |
| 93 # kegg_id_list <- unique(unlist(id_list)) | |
| 94 # } else { | |
| 95 # kegg_id_list <- unique(keggConv("genes", id_list)) | |
| 96 # } | |
| 97 #} else { | |
| 98 # kegg_id_list <- unique(id_list) | |
| 99 #} | |
| 100 | 104 |
| 101 #mapping | 105 #mapping |
| 102 map<-lapply(ref_ids, is.element, unique(kegg_id_list)) | 106 map<-lapply(ref_ids, is.element, unique(kegg_id_list)) |
| 103 names(map) <- sapply(names(map), function(x) gsub("path:","",x),USE.NAMES = FALSE) #remove the prefix "path:" | 107 names(map) <- sapply(names(map), function(x) gsub("path:","",x),USE.NAMES = FALSE) #remove the prefix "path:" |
| 104 | 108 |
| 127 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/kegg_pathways_identification/args.Rda") | 131 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/kegg_pathways_identification/args.Rda") |
| 128 #load("/home/dchristiany/proteore_project/ProteoRE/tools/kegg_pathways_identification/args.Rda") | 132 #load("/home/dchristiany/proteore_project/ProteoRE/tools/kegg_pathways_identification/args.Rda") |
| 129 | 133 |
| 130 ###setting variables | 134 ###setting variables |
| 131 header = str2bool(args$header) | 135 header = str2bool(args$header) |
| 132 if (!is.null(args$id_list)) {id_list <- get_list_from_cp(args$id_list)} | 136 if (!is.null(args$id_list)) {id_list <- get_list_from_cp(args$id_list)} #get ids from copy/paste input |
| 133 if (!is.null(args$input)) { | 137 if (!is.null(args$input)) { #get ids from input file |
| 134 csv <- read_file(args$input,header) | 138 csv <- read_file(args$input,header) |
| 135 ncol <- as.numeric(gsub("c", "" ,args$id_column)) | 139 ncol <- as.numeric(gsub("c", "" ,args$id_column)) |
| 136 id_list <- as.vector(csv[,ncol]) | 140 id_list <- as.vector(csv[,ncol]) |
| 137 id_list <- id_list[which(!is.na(id_list))] | 141 id_list <- id_list[which(!is.na(id_list))] |
| 138 } | 142 } |
| 139 if (args$id_type == "ncbi-geneid") { | |
| 140 id_list <- geneID_to_kegg(id_list,args$species) | |
| 141 } | |
| 142 | 143 |
| 144 #convert to keggID if needed | |
| 145 id_list <- to_keggID(id_list,args$id_type) | |
| 143 | 146 |
| 144 #get pathways of species with associated KEGG ID genes | 147 #get pathways of species with associated KEGG ID genes |
| 145 pathways_list <- get_pathways_list(args$species) | 148 pathways_list <- get_pathways_list(args$species) |
| 146 | 149 |
| 147 #mapping on pathways | 150 #mapping on pathways |
