changeset 1:7004924a3686 draft

planemo upload commit 2e8375a0a037aaf4042cdeb5244228d3f9ca583b-dirty
author proteore
date Wed, 14 Nov 2018 05:46:35 -0500
parents 8883a7173cba
children 179bfc3c7938
files compute_kegg_pathways.R compute_kegg_pathways.xml
diffstat 2 files changed, 33 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/compute_kegg_pathways.R	Mon Nov 12 10:59:49 2018 -0500
+++ b/compute_kegg_pathways.R	Wed Nov 14 05:46:35 2018 -0500
@@ -82,22 +82,26 @@
   return (vector)
 }
 
+to_keggID <- function(id_list,id_type){
+  if (id_type == "ncbi-geneid") {                              
+    id_list <-  unique(geneID_to_kegg(id_list,args$species))
+  } else if (id_type=="uniprot"){
+    id_list <- unique(sapply(id_list, function(x) paste(id_type,":",x,sep=""),USE.NAMES = F))
+    if (length(id_list)>250){
+      id_list <- split(id_list, ceiling(seq_along(id_list)/250))
+      id_list <- sapply(id_list, function(x) keggConv("genes",x))
+      id_list <- unique(unlist(id_list))
+    } else {
+      id_list <- unique(keggConv("genes", id_list))
+    }
+  } else if (id_type=="kegg-id") {
+    id_list <- unique(id_list)
+  }
+  return (id_list)
+}
+
 kegg_mapping<- function(kegg_id_list,id_type,ref_ids) {
   
-    #convert to KEGG ID
-    #if (id_type!="kegg-id"){
-    #  id_list <- unique(sapply(id_list, function(x) paste(id_type,":",x,sep=""),USE.NAMES = F))
-    #  if (length(id_list)>250){
-    #    id_list <- split(id_list, ceiling(seq_along(id_list)/250))
-    #    id_list <- sapply(id_list, function(x) keggConv("genes",x))
-    #    kegg_id_list <- unique(unlist(id_list))
-    #  } else {
-    #  kegg_id_list <- unique(keggConv("genes", id_list))
-    #  }
-    #} else {
-    #  kegg_id_list <- unique(id_list)
-    #}
-  
     #mapping
     map<-lapply(ref_ids, is.element, unique(kegg_id_list))
     names(map) <- sapply(names(map), function(x) gsub("path:","",x),USE.NAMES = FALSE)    #remove the prefix "path:"
@@ -129,17 +133,16 @@
 
 ###setting variables
 header = str2bool(args$header)
-if (!is.null(args$id_list)) {id_list <- get_list_from_cp(args$id_list)}
-if (!is.null(args$input)) { 
+if (!is.null(args$id_list)) {id_list <- get_list_from_cp(args$id_list)}           #get ids from copy/paste input
+if (!is.null(args$input)) {                                                       #get ids from input file
   csv <- read_file(args$input,header)
   ncol <- as.numeric(gsub("c", "" ,args$id_column))
   id_list <- as.vector(csv[,ncol])
   id_list <- id_list[which(!is.na(id_list))]
 }
-if (args$id_type == "ncbi-geneid") {
-  id_list <-  geneID_to_kegg(id_list,args$species)
-}
 
+#convert to keggID if needed
+id_list <- to_keggID(id_list,args$id_type)
 
 #get pathways of species with associated KEGG ID genes
 pathways_list <- get_pathways_list(args$species)
--- a/compute_kegg_pathways.xml	Mon Nov 12 10:59:49 2018 -0500
+++ b/compute_kegg_pathways.xml	Wed Nov 14 05:46:35 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="compute_kegg_pathways" name="KEGG pathways" version="2018.11.12">
+<tool id="compute_kegg_pathways" name="KEGG pathways" version="2018.11.14">
     <description>identification and coverage</description>
     <requirements>
         <requirement type="package" version="1.18.0">bioconductor-keggrest</requirement>
@@ -45,9 +45,10 @@
             </when>
         </conditional>
         <param name="id_type" type="select" label="select your identifiers type :">
+            <option value="ncbi-geneid" selected="true">Entrez gene ID</option>
             <option value="kegg-id">KEGG ID (genes)</option>
-            <!--option value="uniprot">Uniprot Accession number</option-->
-            <option value="ncbi-geneid">Entrez gene ID</option>
+            <option value="uniprot">Uniprot Accession number</option>
+            
         </param>
         <param name="species" type="select" label="Select species">
             <option value="hsa">Human (H. sapiens)</option>
@@ -85,7 +86,13 @@
 
 **Input:**
 
-Input can be either a list of KEGG genes IDs or a file containing multiple columns but with at least one column of KEGG genes IDs (e.g. hsa04970). If your input file contains other type of IDs, please use the ID_Converter tool to get KEGG genes ID.
+Input can be either a list of ids or a file containing multiple columns but with at least one column of ids (e.g. hsa04970). 
+Input ids type can be Entrez gene ID, KEGG gene ID or Uniprot accession number.
+
+.. class:: warningmark  
+
+If you use Uniprot accession number, it will be converted to KEGG gene ID, this step can be time consuming.
+If your input file contains other type of ids, please use the ID_Converter tool.
 
 **Output:**