Mercurial > repos > proteore > proteore_pathview_mapping
changeset 12:9fe4a861601b draft
planemo upload commit 7e2bd28d27e13c402acd46500f64d5c117797aa7-dirty
| author | proteore |
|---|---|
| date | Fri, 09 Nov 2018 05:11:46 -0500 |
| parents | 6d5c0ff2b0bd |
| children | c59ec7fce7b3 |
| files | PathView.R Pathview.xml README.rst hsa_pathways.loc.sample kegg_pathways.loc.sample kegg_pathways_list_index.loc.sample kegg_pathways_visualization.R kegg_pathways_visualization.xml mmu_pathways.loc.sample tool-data/rno_pathways.loc tool_data_table_conf.xml.sample |
| diffstat | 11 files changed, 960 insertions(+), 1147 deletions(-) [+] |
line wrap: on
line diff
--- a/PathView.R Fri Sep 14 09:52:28 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,231 +0,0 @@ -#!/usr/bin/Rscript -#Rscript made for mapping genesID on KEGG pathway with Pathview package -#input : csv file containing ids (uniprot or geneID) to map, plus parameters -#output : KEGG pathway : jpeg or pdf file. - -suppressMessages(library("pathview")) - -read_file <- function(path,header){ - file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE) - if (inherits(file,"try-error")){ - stop("File not found !") - }else{ - return(file) - } -} - -##### fuction to clean and concatenate pathway name (allow more flexibility for user input) -concat_string <- function(x){ - x <- gsub(" - .*","",x) - x <- gsub(" ","",x) - x <- gsub("-","",x) - x <- gsub("_","",x) - x <- gsub(",","",x) - x <- gsub("\\'","",x) - x <- gsub("\\(.*)","",x) - x <- gsub("\\/","",x) - x <- tolower(x) - return(x) -} - - -get_args <- function(){ - - ## Collect arguments - args <- commandArgs(TRUE) - - ## Default setting when no arguments passed - if(length(args) < 1) { - args <- c("--help") - } - - ## Help section - if("--help" %in% args) { - cat("Pathview R script - Arguments: - --help Print this test - --input path of the input file (must contains a colum of uniprot and/or geneID accession number) - --id_list list of ids to use, ',' separated - --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412 - --id_type Type of accession number ('uniprotID' or 'geneID') - --id_column Column containing accesion number of interest (ex : 'c1') - --header Boolean, TRUE if header FALSE if not - --ouput Output filename - --expression_values1 Column containing expression values (first condition) - --expression_values2 Column containing expression values (second condition) - --expression_values3 Column containing expression values (third condition) - --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph - --species KEGG species (hsa, mmu, ...) - --pathways_input Tab with pathways in a column, output format of find_pathways - --pathway_col Column of pathways to use - --header2 Boolean, TRUE if header FALSE if not - - Example: - ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n") - - q(save="no") - } - - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - - return(args) -} - -str2bool <- function(x){ - if (any(is.element(c("t","true"),tolower(x)))){ - return (TRUE) - }else if (any(is.element(c("f","false"),tolower(x)))){ - return (FALSE) - }else{ - return(NULL) - } -} - -is.letter <- function(x) grepl("[[:alpha:]]", x) - -#### hsa00010 -> 00010 -remove_kegg_prefix <- function(x){ - x = gsub(":","",x) - if (substr(x,1,4) == 'path'){ - x=substr(x,5,nchar(x)) - } - if (is.letter(substr(x,1,3))){ - x <- substr(x,4,nchar(x)) - } - return(x) -} - -clean_bad_character <- function(string) { - string <- gsub("X","",string) - string <- gsub(" ","",string) - return(string) -} - -args <- get_args() - -###setting variables -if (!is.null(args$pathways_id)) { - ids <- sapply(rapply(strsplit(clean_bad_character(args$pathways_id),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) -}else if (!is.null(args$pathways_input)){ - header2 <- str2bool(args$header2) - pathway_col <- as.numeric(gsub("c", "" ,args$pathway_col)) - pathways_file = read_file(args$pathways_input,header2) - ids <- sapply(rapply(strsplit(clean_bad_character(pathways_file[,pathway_col]),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) -} -#if (!is.null(args$pathways_name)) {names <- as.vector(sapply(strsplit(args$pathways_name,","), function(x) concat_string(x),USE.NAMES = FALSE))} -if (!is.null(args$id_list)) {id_list <- as.vector(strsplit(clean_bad_character(args$id_list),","))} -id_type <- tolower(args$id_type) -ncol <- as.numeric(gsub("c", "" ,args$id_column)) -header <- str2bool(args$header) -#output <- args$output -native_kegg <- str2bool(args$native_kegg) -species=args$species -#org list used in mapped2geneID -org <- c('Hs','Mm') -names(org) <- c('hsa','mmu') - - - -#read input file or list -if (!is.null(args$input)){ - tab <- read_file(args$input,header) - tab <- data.frame(tab[which(tab[ncol]!=""),]) -} else { - tab <- data.frame(id_list) - ncol=1 -} - -e1 <- as.numeric(gsub("c", "" ,args$expression_values1)) -if (!is.null(args$expression_values1)) { colnames(tab)[e1] <- "e1" } -e2 <- as.numeric(gsub("c", "" ,args$expression_values2)) -if (!is.null(args$expression_values2)) { colnames(tab)[e2] <- "e2" } -e3 <- as.numeric(gsub("c", "" ,args$expression_values3)) -if (!is.null(args$expression_values3)) { colnames(tab)[e3] <- "e3" } - - -##### map uniprotID to entrez geneID -if (id_type == "uniprotid") { - - uniprotID = tab[,ncol] - mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL) - geneID = mapped2geneID[,2] - tab = cbind(tab,geneID) - -}else if (id_type == "geneid"){ - - colnames(tab)[ncol] <- "geneID" - -} - -geneID = tab$geneID[which(tab$geneID !="NA")] -geneID = gsub(" ","",geneID) -geneID = unlist(strsplit(geneID,"[;]")) - - -#### get hsa pathways list -#download.file(url = "http://rest.kegg.jp/link/pathway/hsa", destfile = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv") -#geneid_hsa_pathways <- read_file(path = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv",FALSE) -#names(geneid_hsa_pathways) <- c("geneID","pathway") - -##### build matrix to map on KEGG pathway (kgml : KEGG xml) -if (!is.null(args$expression_values1)&is.null(args$expression_values2)&is.null(args$expression_values3)){ - mat <- as.data.frame(cbind(tab$e1)[which(!is.na(tab$geneID)),]) - row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] -} else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&is.null(args$expression_values3)){ - mat <- as.data.frame(cbind(tab$e1,tab$e2)[which(!is.na(tab$geneID)),]) - row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] -}else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&!is.null(args$expression_values3)){ - mat <- as.data.frame(cbind(tab$e1,tab$e2,tab$e3)[which(!is.na(tab$geneID)),]) - row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))] -} else { - mat <- geneID -} - - -#### simulation data test -#exp1 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=100) -#exp2 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=50) -#exp3 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=10) -#tab <- cbind(tab,exp1,exp2,exp3) - -#write.table(tab, file='/home/dchristiany/proteore_project/ProteoRE/tools/pathview/Lacombe_sim_expression_data.tsv', quote=FALSE, sep='\t',row.names = FALSE) - -#mat <- exp1[1:nrow(tab)] -#names(mat) <- geneID - - -#####mapping geneID (with or without expression values) on KEGG pathway -plot.col.key= TRUE -low_color = "green" -mid_color = "#F3F781" #yellow -high_color = "red" -if (is.null(tab$e1)) { - plot.col.key= FALSE #if there's no exrepession data, we don't show the color key - high_color = "#81BEF7" #blue -} - -for (id in ids) { - pathview(gene.data = mat, - pathway.id = id, - species = species, - kegg.dir = ".", - gene.idtype = "entrez", - kegg.native = native_kegg, - low = list(gene = low_color, cpd = "blue"), - mid = list(gene = mid_color, cpd = "transparent"), - high = list(gene = high_color, cpd = "yellow"), - na.col="#D8D8D8", #gray - cpd.data=NULL, - plot.col.key = plot.col.key, - pdf.size=c(9,9)) -} - -########using keggview.native - -#xml.file=system.file("extdata", "hsa00010.xml", package = "pathview") -#node.data=node.info("/home/dchristiany/hsa00010.xml") -#plot.data.gene=node.map(mol.data=test, node.data, node.types="gene") -#colors =node.color(plot.data = plot.data.gene[,1:9]) \ No newline at end of file
--- a/Pathview.xml Fri Sep 14 09:52:28 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,250 +0,0 @@ -<tool id="Pathview" name="KEGG pathway mapping (pathview)" version="2018.09.14"> - <requirements> - <requirement type="package" version="1.18.0">bioconductor-pathview</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - Rscript $__tool_directory__/PathView.R - #if $input.ids == "text" - --id_list="$input.txt" - #else - --input="$input.file" - --id_column="$input.ncol" - --header="$input.header" - #end if - #if $species.pathways.pathways_id != "pathways_file" - --pathways_id="$species.pathways.pids" - #else - --pathways_input="$species.pathways.file" - --header2="$species.pathways.header2" - --pathway_col="$species.pathways.ncol2" - #end if - --id_type="$id_type" - --native_kegg="$native" - - #if $input.ids=="file" - #if $input.expression_values.nb_exp =="1" - --expression_values1="$input.expression_values.e1" - #else if $input.expression_values.nb_exp =="2" - --expression_values1="$input.expression_values.e1" - --expression_values2="$input.expression_values.e2" - #else if $input.expression_values.nb_exp =="3" - --expression_values1="$input.expression_values.e1" - --expression_values2="$input.expression_values.e2" - --expression_values3="$input.expression_values.e3" - #end if - #end if - - --species=${species.ref_file} - - ]]></command> - <inputs> - <conditional name="species"> - <param name="ref_file" type="select" label="Select species" > - <option value="hsa">Human (hsa)</option> - <option value="mmu">Mouse (mmu)</option> - </param> - <when value="hsa"> - <conditional name="pathways"> - <param name="pathways_id" type="select" label="Provide your pathway(s)" help="Enter KEGG pathway name(s) or KEGG pathway id(s)"> - <option value="pathways_names">KEGG pathway name(s)</option> - <option value="pathways_ids">KEGG pathway id(s)</option> - <option value="pathways_file">KEGG pathway id(s) from file</option> - </param> - <when value="pathways_names"> - <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'> - <options from_data_table="hsa_pathways"> - <filter type="sort_by" column="1"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="pathways_ids"> - <param name="pids" type="text" label="Copy/paste your pathway id(s)" help='IDs must be separated by "," into the form field, for example: "00010,05412" or "hsa00010,hsa05412" or "path:hsa00010"'> - <sanitizer invalid_char=''> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - <mapping> - <add source=" " target=""/> - </mapping> - </sanitizer> - </param> - </when> - <when value="pathways_file"> - <param name="file" type="data" format="txt,tabular" label="Select a file with a column of pathways id" help="Pathway id format : 'path:hsa00010' or 'hsa00010' or '00010'" /> - <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contains a header?" /> - <param name="ncol2" type="text" value="c1" label="The column which contains your pathways ids" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> - </when> - </conditional> - </when> - <when value="mmu"> - <conditional name="pathways"> - <param name="pathways_id" type="select" label="Provide your pathway(s)" help="Enter KEGG pathway name(s) or KEGG pathway id(s)"> - <option value="pathways_names">KEGG pathway name(s)</option> - <option value="pathways_ids">KEGG pathway id(s)</option> - <option value="pathways_file">KEGG pathway id(s) from file</option> - </param> - <when value="pathways_names"> - <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'> - <options from_data_table="mmu_pathways"> - <filter type="sort_by" column="1"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="pathways_ids"> - <param name="pids" type="text" label="Copy/paste your pathway id(s)" help='IDs must be separated by "," into the form field, for example: "path:mmu00053" or "mmu00053,mmu00340" or "00053"'> - <sanitizer invalid_char=''> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - <mapping> - <add source=" " target=""/> - </mapping> - </sanitizer> - </param> - </when> - <when value="pathways_file"> - <param name="file" type="data" format="txt,tabular" label="Select a file with a column of pathways id " help="Pathway id format : 'path:mmu00053' or 'mmu00053' or '00053'" /> - <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> - <param name="ncol2" type="text" value="c1" label="The column which contains your pathways ids" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> - </when> - </conditional> - </when> - </conditional> - <param name="id_type" type="select" label="Select your identifiers type :"> - <option value="uniprotID">Uniprot Accession number</option> - <option value="geneID">Entrez gene ID</option> - </param> - <conditional name="input" > - <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" > - <option value="text">Copy/paste your identifiers</option> - <option value="file" selected="true">Input file containing your identifiers</option> - </param> - <when value="text" > - <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' > - <sanitizer invalid_char=''> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - <mapping initial="none"> - <add source="'" target="__sq__"/> - </mapping> - </sanitizer> - </param> - </when> - <when value="file" > - <param name="file" type="data" format="txt,tabular" label="Select a file that contains your list of IDs" help="" /> - <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contains a header?" /> - <param name="ncol" type="text" value="c1" label="The column which contains your IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> - <conditional name="expression_values"> - <param name="nb_exp" type="select" label="How many expression values column in your file ?"> - <option value="0" >0</option> - <option value="1" >1</option> - <option value="2" >2</option> - <option value="3" >3</option> - </param> - <when value="0"> - </when> - <when value="1"> - <param name="e1" type="text" value="" label="First column number of your expression data" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'/> - </when> - <when value="2"> - <param name="e1" type="text" value="" label="First column number of your expression data"/> - <param name="e2" type="text" value="" label="Second column number of your expression data"/> - </when> - <when value="3"> - <param name="e1" type="text" value="" label="First column number of your expression data"/> - <param name="e2" type="text" value="" label="Second column number of your expression data"/> - <param name="e3" type="text" value="" label="Third column number of your expression data"/> - </when> - </conditional> - </when> - </conditional> - <param name="native" type="select" label="Choose the output graph format"> - <option value="true">KEGG graph (.png)</option> - <option value="false">Graphviz layout engine (.pdf)</option> - </param> - </inputs> - <outputs> - <data name="graphviz_from_file" format="pdf" label="KEGG mapping with ${input.file.name}"> - <filter>native=="false" and input["ids"] == "file"</filter> - <discover_datasets pattern="(?P<designation>.+)\.pathview.*\.pdf" ext="pdf" visible="true" assign_primary_output="true"/> - </data> - <data name="kegg_from_file" format="png" label="KEGG mapping with ${input.file.name}"> - <filter>native=="true" and input["ids"] == "file"</filter> - <discover_datasets pattern="(?P<designation>.+)\.pathview.*\.png" ext="png" visible="true" assign_primary_output="true"/> - </data> - <data name="graphviz_from_list" format="pdf" label="KEGG mapping with given ids"> - <filter>native=="false" and input["ids"] == "text"</filter> - <discover_datasets pattern="(?P<designation>.+)\.pathview.*\.pdf" ext="pdf" visible="true" assign_primary_output="true"/> - </data> - <data name="kegg_from_list" format="png" label="KEGG mapping with given ids"> - <filter>native=="true" and input["ids"] == "text"</filter> - <discover_datasets pattern="(?P<designation>.+)\.pathview.*\.png" ext="png" visible="true" assign_primary_output="true"/> - </data> - </outputs> - <tests> - <test> - <conditional name="input"> - <param name="ids" value="file"/> - <param name="file" value="Lacombe_et_al_2017_OK.txt"/> - <param name="header" value="true"/> - <param name="ncol" value="c1"/> - </conditional> - <conditional name="pathways"> - <param name="pathways_id" value="pathways_ids"/> - <param name="pids" value="04514,05167,00010"/> - </conditional> - <param name="id_type" value="uniprotID"/> - <param name="species" value="hsa"/> - <param name="native" value="true"/> - <output name="kegg_from_file" file="hsa04514.pathview.png" compare="sim_size"/> - <output name="kegg_from_file" file="hsa05167.pathview.png" compare="sim_size"/> - <output name="kegg_from_file" file="hsa00010.pathview.png" compare="sim_size"/> - </test> - </tests> - <help><![CDATA[ -This tool map a list of Uniprot Accession number or Entrez gene ID to KEGG pathway with pathview R package. - -Select your identifier type : UniprotAC or Entrez gene ID - -Select an input file containing ids in a column, set header and column number or copy/paste your ids. - -You can import 1 to 3 column(s) of expression values if you are importing ids from a file. - -Select a species of interest. - -Select one or several pathways of interest from the dropdown menu or copy/paste KEGG pathway id(s) or import it from a file. - -Select the graph format : KEGG (jpg) or graphviz (pdf) - -Uniprot accession number converted to Entrez geneID or Entrez geneID are mapped to each selected pathways. - -Output : One file (png or pdf) for each selected pathway. - ------ - -.. class:: infomark - -**Authors** - -David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR - -Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform - -This work has been partially funded through the French National Agency for Research (ANR) IFB project. - -Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. - ]]></help> - <citations> - <citation type="doi">10.1093/nar/gkx372</citation> - <citation type="bibtex"> -@misc{renameTODO, - author = {Weijun Luo}, - year = {2013}, - title = {pathview}, - url = {https://bioconductor.org/packages/release/bioc/html/pathview.html}, -}</citation> - </citations> -</tool>
--- a/README.rst Fri Sep 14 09:52:28 2018 -0400 +++ b/README.rst Fri Nov 09 05:11:46 2018 -0500 @@ -15,18 +15,52 @@ This tool map a list of Uniprot Accession number or Entrez gene ID to KEGG pathway with pathview R package. -Select your identifier type : UniprotAC or Entrez gene ID +You can map Entrez gene IDs / Uniprot accession number from three species : human, mouse and rat. -Select an input file containing ids in a column, set header and column number or copy/paste your ids. +If your input have another type of IDs, please use the ID_Converter tool. + +**Input:** + -You can import 1 to 3 column(s) of expression values if you are importing ids from a file. +- KEGG Pathways IDs to be used for mapping can be set by: + - chosing from the KEGG pathways name list + - giving a list (copy/paste) + - importing a list from a dataset (column) - output of KEGG pathways identification and coverage can be used (1st column) +- Genes/proteins ids to map can be either a list of Entrez genes IDs / Uniprot accession number or a file (tabular, tsv, txt) containing at least one column of Entrez genes IDs / Uniprot accession number. +- fold change values (up to three columns) from a dataset (same dataset as for Genes/proteins ids to map) -Select a species of interest. +You can see below an example of an input file with identifiers (uniprot_AC) and fold_change values. + +.. csv-table:: Simulated data + :header: "Uniprot_AC","Protein.name","Number_of_peptides","fc_values 1","fc_values 2","fc_values 3" + + "P15924","Desmoplakin","69","0.172302292051025","-0.757435966487116","0.0411240398990759" + "P02538","Keratin, type II cytoskeletal 6A","53","-0.988842456122076","0.654626325100182","-0.219153396366064" + "P02768","Serum albumin","44","-0.983493243315454","0.113752002761474","-0.645886132600729" + "P08779","Keratin, type I cytoskeletal 16","29","0.552302597284443","-0.329045605110646","2.10616106806788" -Select one or several pathways of interest from the dropdown menu or copy/paste KEGG pathway id(s) or import it from a file. +| -Select the graph format : KEGG (jpg) or graphviz (pdf) +**Output:** -Uniprot accession number converted to Entrez geneID or Entrez geneID are mapped to each selected pathways. +- a **collection dataset** named 'KEGG pathways map from <dataset>', one file (png or pdf) for each given pathway. +- a **summary text file** (.tsv) of the mapping(s) with the following columns + - **KEGG pathway ID**: KEGG pathway(s) used to map given genes/proteins ids + - **pathway name**: name(s) of KEGG pathway(s) used for mapping + - **nb of Uniprot_AC used** (only when Uniprot accession number is given): number of Uniprot accession number which will be converted to Entrez genes IDs + - **nb of Entrez gene ID used**: number of Entrez gene IDs used for mapping + - **nb of Entrez gene ID mapped**: number of Entrez gene IDs mapped on a given pathway + - **nb of Entrez gene ID in the pathway**: number total of Entrez gene IDs in a given pathway + - **ratio of Entrez gene ID mapped**: number of Entrez gene IDs mapped / number total of Entrez gene IDs + - **Entrez gene ID mapped**: list of mapped Entrez gene IDs + - **uniprot_AC mapped** (only when Uniprot accession number is given): list of Uniprot accession number corresponding to the mapped Entrez gene IDs -Output : One file (png or pdf) for each selected pathway. \ No newline at end of file +----- + +.. class:: infomark + +**Database:** + +KEGG Pathways names list are from http://rest.kegg.jp/list/pathway/ + +User manual / Documentation: http://www.bioconductor.org/packages/release/bioc/html/pathview.html
--- a/hsa_pathways.loc.sample Fri Sep 14 09:52:28 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,330 +0,0 @@ -#value name -hsa00010 Glycolysis / Gluconeogenesis -hsa00020 Citrate cycle (TCA cycle) -hsa00030 Pentose phosphate pathway -hsa00040 Pentose and glucuronate interconversions -hsa00051 Fructose and mannose metabolism -hsa00052 Galactose metabolism -hsa00053 Ascorbate and aldarate metabolism -hsa00061 Fatty acid biosynthesis -hsa00062 Fatty acid elongation -hsa00071 Fatty acid degradation -hsa00072 Synthesis and degradation of ketone bodies -hsa00100 Steroid biosynthesis -hsa00120 Primary bile acid biosynthesis -hsa00130 Ubiquinone and other terpenoid-quinone biosynthesis -hsa00140 Steroid hormone biosynthesis -hsa00190 Oxidative phosphorylation -hsa00220 Arginine biosynthesis -hsa00230 Purine metabolism -hsa00232 Caffeine metabolism -hsa00240 Pyrimidine metabolism -hsa00250 Alanine, aspartate and glutamate metabolism -hsa00260 Glycine, serine and threonine metabolism -hsa00270 Cysteine and methionine metabolism -hsa00280 Valine, leucine and isoleucine degradation -hsa00290 Valine, leucine and isoleucine biosynthesis -hsa00310 Lysine degradation -hsa00330 Arginine and proline metabolism -hsa00340 Histidine metabolism -hsa00350 Tyrosine metabolism -hsa00360 Phenylalanine metabolism -hsa00380 Tryptophan metabolism -hsa00400 Phenylalanine, tyrosine and tryptophan biosynthesis -hsa00410 beta-Alanine metabolism -hsa00430 Taurine and hypotaurine metabolism -hsa00440 Phosphonate and phosphinate metabolism -hsa00450 Selenocompound metabolism -hsa00471 D-Glutamine and D-glutamate metabolism -hsa00472 D-Arginine and D-ornithine metabolism -hsa00480 Glutathione metabolism -hsa00500 Starch and sucrose metabolism -hsa00510 N-Glycan biosynthesis -hsa00511 Other glycan degradation -hsa00512 Mucin type O-glycan biosynthesis -hsa00514 Other types of O-glycan biosynthesis -hsa00515 Mannose type O-glycan biosynthesis -hsa00520 Amino sugar and nucleotide sugar metabolism -hsa00524 Neomycin, kanamycin and gentamicin biosynthesis -hsa00531 Glycosaminoglycan degradation -hsa00532 Glycosaminoglycan biosynthesis -hsa00533 Glycosaminoglycan biosynthesis -hsa00534 Glycosaminoglycan biosynthesis -hsa00561 Glycerolipid metabolism -hsa00562 Inositol phosphate metabolism -hsa00563 Glycosylphosphatidylinositol (GPI)-anchor biosynthesis -hsa00564 Glycerophospholipid metabolism -hsa00565 Ether lipid metabolism -hsa00590 Arachidonic acid metabolism -hsa00591 Linoleic acid metabolism -hsa00592 alpha-Linolenic acid metabolism -hsa00600 Sphingolipid metabolism -hsa00601 Glycosphingolipid biosynthesis -hsa00603 Glycosphingolipid biosynthesis -hsa00604 Glycosphingolipid biosynthesis -hsa00620 Pyruvate metabolism -hsa00630 Glyoxylate and dicarboxylate metabolism -hsa00640 Propanoate metabolism -hsa00650 Butanoate metabolism -hsa00670 One carbon pool by folate -hsa00730 Thiamine metabolism -hsa00740 Riboflavin metabolism -hsa00750 Vitamin B6 metabolism -hsa00760 Nicotinate and nicotinamide metabolism -hsa00770 Pantothenate and CoA biosynthesis -hsa00780 Biotin metabolism -hsa00785 Lipoic acid metabolism -hsa00790 Folate biosynthesis -hsa00830 Retinol metabolism -hsa00860 Porphyrin and chlorophyll metabolism -hsa00900 Terpenoid backbone biosynthesis -hsa00910 Nitrogen metabolism -hsa00920 Sulfur metabolism -hsa00970 Aminoacyl-tRNA biosynthesis -hsa00980 Metabolism of xenobiotics by cytochrome P450 -hsa00982 Drug metabolism -hsa00983 Drug metabolism -hsa01040 Biosynthesis of unsaturated fatty acids -hsa01100 Metabolic pathways -hsa01200 Carbon metabolism -hsa01210 2-Oxocarboxylic acid metabolism -hsa01212 Fatty acid metabolism -hsa01230 Biosynthesis of amino acids -hsa01521 EGFR tyrosine kinase inhibitor resistance -hsa01522 Endocrine resistance -hsa01523 Antifolate resistance -hsa01524 Platinum drug resistance -hsa02010 ABC transporters -hsa03008 Ribosome biogenesis in eukaryotes -hsa03010 Ribosome -hsa03013 RNA transport -hsa03015 mRNA surveillance pathway -hsa03018 RNA degradation -hsa03020 RNA polymerase -hsa03022 Basal transcription factors -hsa03030 DNA replication -hsa03040 Spliceosome -hsa03050 Proteasome -hsa03060 Protein export -hsa03320 PPAR signaling pathway -hsa03410 Base excision repair -hsa03420 Nucleotide excision repair -hsa03430 Mismatch repair -hsa03440 Homologous recombination -hsa03450 Non-homologous end-joining -hsa03460 Fanconi anemia pathway -hsa04010 MAPK signaling pathway -hsa04012 ErbB signaling pathway -hsa04014 Ras signaling pathway -hsa04015 Rap1 signaling pathway -hsa04020 Calcium signaling pathway -hsa04022 cGMP-PKG signaling pathway -hsa04024 cAMP signaling pathway -hsa04060 Cytokine-cytokine receptor interaction -hsa04062 Chemokine signaling pathway -hsa04064 NF-kappa B signaling pathway -hsa04066 HIF-1 signaling pathway -hsa04068 FoxO signaling pathway -hsa04070 Phosphatidylinositol signaling system -hsa04071 Sphingolipid signaling pathway -hsa04072 Phospholipase D signaling pathway -hsa04080 Neuroactive ligand-receptor interaction -hsa04110 Cell cycle -hsa04114 Oocyte meiosis -hsa04115 p53 signaling pathway -hsa04120 Ubiquitin mediated proteolysis -hsa04122 Sulfur relay system -hsa04130 SNARE interactions in vesicular transport -hsa04136 Autophagy -hsa04137 Mitophagy -hsa04140 Autophagy -hsa04141 Protein processing in endoplasmic reticulum -hsa04142 Lysosome -hsa04144 Endocytosis -hsa04145 Phagosome -hsa04146 Peroxisome -hsa04150 mTOR signaling pathway -hsa04151 PI3K-Akt signaling pathway -hsa04152 AMPK signaling pathway -hsa04210 Apoptosis -hsa04211 Longevity regulating pathway -hsa04213 Longevity regulating pathway -hsa04215 Apoptosis -hsa04216 Ferroptosis -hsa04217 Necroptosis -hsa04218 Cellular senescence -hsa04260 Cardiac muscle contraction -hsa04261 Adrenergic signaling in cardiomyocytes -hsa04270 Vascular smooth muscle contraction -hsa04310 Wnt signaling pathway -hsa04330 Notch signaling pathway -hsa04340 Hedgehog signaling pathway -hsa04350 TGF-beta signaling pathway -hsa04360 Axon guidance -hsa04370 VEGF signaling pathway -hsa04371 Apelin signaling pathway -hsa04380 Osteoclast differentiation -hsa04390 Hippo signaling pathway -hsa04392 Hippo signaling pathway -hsa04510 Focal adhesion -hsa04512 ECM-receptor interaction -hsa04514 Cell adhesion molecules (CAMs) -hsa04520 Adherens junction -hsa04530 Tight junction -hsa04540 Gap junction -hsa04550 Signaling pathways regulating pluripotency of stem cells -hsa04610 Complement and coagulation cascades -hsa04611 Platelet activation -hsa04612 Antigen processing and presentation -hsa04614 Renin-angiotensin system -hsa04620 Toll-like receptor signaling pathway -hsa04621 NOD-like receptor signaling pathway -hsa04622 RIG-I-like receptor signaling pathway -hsa04623 Cytosolic DNA-sensing pathway -hsa04625 C-type lectin receptor signaling pathway -hsa04630 Jak-STAT signaling pathway -hsa04640 Hematopoietic cell lineage -hsa04650 Natural killer cell mediated cytotoxicity -hsa04657 IL-17 signaling pathway -hsa04658 Th1 and Th2 cell differentiation -hsa04659 Th17 cell differentiation -hsa04660 T cell receptor signaling pathway -hsa04662 B cell receptor signaling pathway -hsa04664 Fc epsilon RI signaling pathway -hsa04666 Fc gamma R-mediated phagocytosis -hsa04668 TNF signaling pathway -hsa04670 Leukocyte transendothelial migration -hsa04672 Intestinal immune network for IgA production -hsa04710 Circadian rhythm -hsa04713 Circadian entrainment -hsa04714 Thermogenesis -hsa04720 Long-term potentiation -hsa04721 Synaptic vesicle cycle -hsa04722 Neurotrophin signaling pathway -hsa04723 Retrograde endocannabinoid signaling -hsa04724 Glutamatergic synapse -hsa04725 Cholinergic synapse -hsa04726 Serotonergic synapse -hsa04727 GABAergic synapse -hsa04728 Dopaminergic synapse -hsa04730 Long-term depression -hsa04740 Olfactory transduction -hsa04742 Taste transduction -hsa04744 Phototransduction -hsa04750 Inflammatory mediator regulation of TRP channels -hsa04810 Regulation of actin cytoskeleton -hsa04910 Insulin signaling pathway -hsa04911 Insulin secretion -hsa04912 GnRH signaling pathway -hsa04913 Ovarian steroidogenesis -hsa04914 Progesterone-mediated oocyte maturation -hsa04915 Estrogen signaling pathway -hsa04916 Melanogenesis -hsa04917 Prolactin signaling pathway -hsa04918 Thyroid hormone synthesis -hsa04919 Thyroid hormone signaling pathway -hsa04920 Adipocytokine signaling pathway -hsa04921 Oxytocin signaling pathway -hsa04922 Glucagon signaling pathway -hsa04923 Regulation of lipolysis in adipocytes -hsa04924 Renin secretion -hsa04925 Aldosterone synthesis and secretion -hsa04926 Relaxin signaling pathway -hsa04927 Cortisol synthesis and secretion -hsa04928 Parathyroid hormone synthesis, secretion and action -hsa04930 Type II diabetes mellitus -hsa04931 Insulin resistance -hsa04932 Non-alcoholic fatty liver disease (NAFLD) -hsa04933 AGE-RAGE signaling pathway in diabetic complications -hsa04934 Cushing's syndrome -hsa04940 Type I diabetes mellitus -hsa04950 Maturity onset diabetes of the young -hsa04960 Aldosterone-regulated sodium reabsorption -hsa04961 Endocrine and other factor-regulated calcium reabsorption -hsa04962 Vasopressin-regulated water reabsorption -hsa04964 Proximal tubule bicarbonate reclamation -hsa04966 Collecting duct acid secretion -hsa04970 Salivary secretion -hsa04971 Gastric acid secretion -hsa04972 Pancreatic secretion -hsa04973 Carbohydrate digestion and absorption -hsa04974 Protein digestion and absorption -hsa04975 Fat digestion and absorption -hsa04976 Bile secretion -hsa04977 Vitamin digestion and absorption -hsa04978 Mineral absorption -hsa04979 Cholesterol metabolism -hsa05010 Alzheimer's disease -hsa05012 Parkinson's disease -hsa05014 Amyotrophic lateral sclerosis (ALS) -hsa05016 Huntington's disease -hsa05020 Prion diseases -hsa05030 Cocaine addiction -hsa05031 Amphetamine addiction -hsa05032 Morphine addiction -hsa05033 Nicotine addiction -hsa05034 Alcoholism -hsa05100 Bacterial invasion of epithelial cells -hsa05110 Vibrio cholerae infection -hsa05120 Epithelial cell signaling in Helicobacter pylori infection -hsa05130 Pathogenic Escherichia coli infection -hsa05131 Shigellosis -hsa05132 Salmonella infection -hsa05133 Pertussis -hsa05134 Legionellosis -hsa05140 Leishmaniasis -hsa05142 Chagas disease (American trypanosomiasis) -hsa05143 African trypanosomiasis -hsa05144 Malaria -hsa05145 Toxoplasmosis -hsa05146 Amoebiasis -hsa05150 Staphylococcus aureus infection -hsa05152 Tuberculosis -hsa05160 Hepatitis C -hsa05161 Hepatitis B -hsa05162 Measles -hsa05163 Human cytomegalovirus infection -hsa05164 Influenza A -hsa05165 Human papillomavirus infection -hsa05166 HTLV-I infection -hsa05167 Kaposi's sarcoma-associated herpesvirus infection -hsa05168 Herpes simplex infection -hsa05169 Epstein-Barr virus infection -hsa05200 Pathways in cancer -hsa05202 Transcriptional misregulation in cancer -hsa05203 Viral carcinogenesis -hsa05204 Chemical carcinogenesis -hsa05205 Proteoglycans in cancer -hsa05206 MicroRNAs in cancer -hsa05210 Colorectal cancer -hsa05211 Renal cell carcinoma -hsa05212 Pancreatic cancer -hsa05213 Endometrial cancer -hsa05214 Glioma -hsa05215 Prostate cancer -hsa05216 Thyroid cancer -hsa05217 Basal cell carcinoma -hsa05218 Melanoma -hsa05219 Bladder cancer -hsa05220 Chronic myeloid leukemia -hsa05221 Acute myeloid leukemia -hsa05222 Small cell lung cancer -hsa05223 Non-small cell lung cancer -hsa05224 Breast cancer -hsa05225 Hepatocellular carcinoma -hsa05226 Gastric cancer -hsa05230 Central carbon metabolism in cancer -hsa05231 Choline metabolism in cancer -hsa05310 Asthma -hsa05320 Autoimmune thyroid disease -hsa05321 Inflammatory bowel disease (IBD) -hsa05322 Systemic lupus erythematosus -hsa05323 Rheumatoid arthritis -hsa05330 Allograft rejection -hsa05332 Graft-versus-host disease -hsa05340 Primary immunodeficiency -hsa05410 Hypertrophic cardiomyopathy (HCM) -hsa05412 Arrhythmogenic right ventricular cardiomyopathy (ARVC) -hsa05414 Dilated cardiomyopathy (DCM) -hsa05416 Viral myocarditis -hsa05418 Fluid shear stress and atherosclerosis
--- a/kegg_pathways.loc.sample Fri Sep 14 09:52:28 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -hsa_pathways Human (hsa) hsa tool-data/hsa_pathways.tsv -mmu_pathways Mouse (mmu) mmu tool-data/mmu_pathways.tsv \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kegg_pathways_list_index.loc.sample Fri Nov 09 05:11:46 2018 -0500 @@ -0,0 +1,4 @@ +#value name path +hsa Homo sapiens tool-data/hsa_pathways.loc +mmu Mus musculus tool-data/mmu_pathways.loc +rno Rattus norvegicus tool-data/rno_pathways.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kegg_pathways_visualization.R Fri Nov 09 05:11:46 2018 -0500 @@ -0,0 +1,275 @@ +#!/usr/bin/Rscript +#Rscript made for mapping genesID on KEGG pathway with Pathview package +#input : csv file containing ids (uniprot or geneID) to map, plus parameters +#output : KEGG pathway : jpeg or pdf file. + +options(warn=-1) #TURN OFF WARNINGS !!!!!! + +suppressMessages(library("pathview")) + +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +##### fuction to clean and concatenate pathway name (allow more flexibility for user input) +concat_string <- function(x){ + x <- gsub(" - .*","",x) + x <- gsub(" ","",x) + x <- gsub("-","",x) + x <- gsub("_","",x) + x <- gsub(",","",x) + x <- gsub("\\'","",x) + x <- gsub("\\(.*)","",x) + x <- gsub("\\/","",x) + x <- tolower(x) + return(x) +} + +#return output suffix (pathway name) from id kegg (ex : hsa:00010) +get_suffix <- function(pathways_list,species,id){ + suffix = gsub("/","or",pathways_list[pathways_list[,1]==paste(species,id,sep=""),2]) + suffix = gsub(" ","_",suffix) + if (nchar(suffix) > 50){ + suffix = substr(suffix,1,50) + } + return(suffix) +} + +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +is.letter <- function(x) grepl("[[:alpha:]]", x) + +#### hsa00010 -> 00010 +remove_kegg_prefix <- function(x){ + x = gsub(":","",x) + if (substr(x,1,4) == 'path'){ + x=substr(x,5,nchar(x)) + } + if (is.letter(substr(x,1,3))){ + x <- substr(x,4,nchar(x)) + } + return(x) +} + +clean_bad_character <- function(string) { + string <- gsub("X","",string) + return(string) +} + +get_list_from_cp <-function(list){ + list = strsplit(list, "[ \t\n]+")[[1]] + list = list[list != ""] #remove empty entry + list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2") + return(list) +} + +#return a summary from the mapping with pathview in a vector +mapping_summary <- function(pv.out,species,id,id_type){ + + mapped <- pv.out$plot.data.gene$kegg.names[which(pv.out$plot.data.gene$all.mapped!='')] + nb_mapped <- length(mapped) + nb_kegg_id <- length(unique(pv.out$plot.data.gene$kegg.names)) + ratio = round((nb_mapped/nb_kegg_id)*100, 2) + if (is.nan(ratio)) { ratio = ""} + pathway_id = paste(species,id,sep="") + pathway_name = as.character(pathways_list[pathways_list[,1]==pathway_id,][2]) + + if (id_type=="geneid"){ + row <- c(pathway_id,pathway_name,length(unique(geneID)),nb_kegg_id,nb_mapped,ratio,paste(mapped,collapse=";")) + names(row) <- c("KEGG pathway ID","pathway name","nb of Entrez gene ID used","nb of Entrez gene ID mapped", + "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped") + }else if (id_type=="uniprotid"){ + row <- c(pathway_id,pathway_name,length(unique(uniprotID)),length(unique(geneID)),nb_mapped,nb_kegg_id,ratio,paste(mapped,collapse=";"),paste(mapped2geneID[which(mapped2geneID[,2] %in% mapped)],collapse=";")) + names(row) <- c("KEGG pathway ID","pathway name","nb of Uniprot_AC used","nb of Entrez gene ID used","nb of Entrez gene ID mapped", + "nb of Entrez gene ID in the pathway", "ratio of Entrez gene ID mapped (%)","Entrez gene ID mapped","uniprot_AC mapped") + } + return(row) +} + +get_args <- function(){ + + ## Collect arguments + args <- commandArgs(TRUE) + + ## Default setting when no arguments passed + if(length(args) < 1) { + args <- c("--help") + } + + ## Help section + if("--help" %in% args) { + cat("Pathview R script + Arguments: + --help Print this test + --input path of the input file (must contains a colum of uniprot and/or geneID accession number) + --id_list list of ids to use, ',' separated + --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412 + --id_type Type of accession number ('uniprotID' or 'geneID') + --id_column Column containing accesion number of interest (ex : 'c1') + --header Boolean, TRUE if header FALSE if not + --output Output filename + --fold_change_col Column(s) containing fold change values (comma separated) + --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph + --species KEGG species (hsa, mmu, ...) + --pathways_input Tab with pathways in a column, output format of find_pathways + --pathway_col Column of pathways to use + --header2 Boolean, TRUE if header FALSE if not + --pathways_list path of file containg the species pathways list (hsa_pathways.loc, mmu_pathways.loc, ...) + + Example: + ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n") + + q(save="no") + } + + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + return(args) +} + +args <- get_args() + +#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/kegg_pathways_visualization/args.Rda") +#load("/home/dchristiany/proteore_project/ProteoRE/tools/kegg_pathways_visualization/args.Rda") + +###setting variables +if (!is.null(args$pathways_id)) { + ids <- get_list_from_cp(clean_bad_character(args$pathways_id)) + ids <- sapply(ids, function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) +}else if (!is.null(args$pathways_input)){ + header2 <- str2bool(args$header2) + pathway_col <- as.numeric(gsub("c", "" ,args$pathway_col)) + pathways_file = read_file(args$pathways_input,header2) + ids <- sapply(rapply(strsplit(clean_bad_character(pathways_file[,pathway_col]),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE) +} +pathways_list <- read_file(args$pathways_list,F) +if (!is.null(args$id_list)) { + id_list <- get_list_from_cp(args$id_list) + } +id_type <- tolower(args$id_type) +ncol <- as.numeric(gsub("c", "" ,args$id_column)) +header <- str2bool(args$header) +native_kegg <- str2bool(args$native_kegg) +species=args$species +fold_change_data = str2bool(args$fold_change_data) + +#org list used in mapped2geneID +org <- c('Hs','Mm','Rn') +names(org) <- c('hsa','mmu','rno') + +#read input file or list +if (!is.null(args$input)){ + tab <- read_file(args$input,header) + tab <- data.frame(tab[which(tab[ncol]!=""),]) +} else { + tab <- data.frame(id_list,stringsAsFactors = F) + ncol=1 +} + +#fold change columns +#make sure its double and name expression value columns +if (fold_change_data){ + fold_change <- as.integer(unlist(strsplit(gsub("c","",args$fold_change_col),","))) + if (length(fold_change) > 3) { fold_change= fold_change[1:3] } + for (i in 1:length(fold_change)) { + fc_col = fold_change[i] + colnames(tab)[fc_col] <- paste("e",i,sep='') + tab[,fc_col] <- as.double(gsub(",",".",as.character(tab[,fc_col]) )) + } +} + +##### map uniprotID to entrez geneID +if (id_type == "uniprotid") { + uniprotID = tab[,ncol] + mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = org[[species]], pkg.name = NULL) + geneID = mapped2geneID[,2] + tab = cbind(tab,geneID) +}else if (id_type == "geneid"){ + colnames(tab)[ncol] <- "geneID" +} + +geneID = as.character(tab$geneID[which(!is.na(tab$geneID))]) +geneID = gsub(" ","",geneID) +geneID = unlist(strsplit(geneID,"[;]")) + +##### build matrix to map on KEGG pathway (kgml : KEGG xml) +if (fold_change_data) { + geneID_indices = which(!duplicated(geneID)) + if (length(fold_change) == 3){ + mat <- as.data.frame(cbind(tab$e1,tab$e2,tab$e3)[which(!is.na(tab$geneID)),]) + mat = mat[geneID_indices,] + row.names(mat) <- geneID[geneID_indices] + } else if (length(fold_change) == 2){ + mat <- as.data.frame(cbind(tab$e1,tab$e2)[which(!is.na(tab$geneID)),]) + mat = mat[geneID_indices,] + row.names(mat) <- geneID[geneID_indices] + } else { + mat <- as.data.frame(cbind(tab$e1)[which(!is.na(tab$geneID)),]) + mat = mat[geneID_indices,] + names(mat) <- geneID[geneID_indices] + } +} else { + mat <- geneID +} + +#####mapping geneID (with or without expression values) on KEGG pathway +plot.col.key= TRUE +low_color = "green" +mid_color = "#F3F781" #yellow +high_color = "red" +if (is.null(tab$e1)) { + plot.col.key= FALSE #if there's no exrepession data, we don't show the color key + high_color = "#81BEF7" #blue +} + +#create graph(s) and text output +for (id in ids) { + suffix= get_suffix(pathways_list,species,id) + pv.out <- suppressMessages(pathview(gene.data = mat, + gene.idtype = "entrez", + pathway.id = id, + species = species, + kegg.dir = ".", + out.suffix=suffix, + kegg.native = native_kegg, + low = list(gene = low_color, cpd = "blue"), + mid = list(gene = mid_color, cpd = "transparent"), + high = list(gene = high_color, cpd = "yellow"), + na.col="#D8D8D8", #gray + cpd.data=NULL, + plot.col.key = plot.col.key, + pdf.size=c(9,9))) + + if (is.list(pv.out)){ + + #creating text file + if (!exists("DF")) { + DF <- data.frame(t(mapping_summary(pv.out,species,id,id_type)),stringsAsFactors = F,check.names = F) + } else { + #print (mapping_summary(pv.out,species,id)) + DF <- rbind(DF,data.frame(t(mapping_summary(pv.out,species,id,id_type)),stringsAsFactors = F,check.names = F)) + } + } + +} + +DF <- as.data.frame(apply(DF, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA + +#text file output +write.table(DF,file=args$output,quote=FALSE, sep='\t',row.names = FALSE, col.names = TRUE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kegg_pathways_visualization.xml Fri Nov 09 05:11:46 2018 -0500 @@ -0,0 +1,305 @@ +<tool id="kegg_pathways_visualization" name="KEGG pathways" version="2018.11.08"> + <description>map visualisation (PathView)</description> + <requirements> + <requirement type="package" version="1.18.0">bioconductor-pathview</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + Rscript $__tool_directory__/kegg_pathways_visualization.R + #if $input.ids == "text" + --id_list="$input.txt" + #else + --input="$input.file" + --id_column="$input.ncol" + --header="$input.header" + #end if + #if $species.pathways.pathways_id != "pathways_file" + --pathways_id="$species.pathways.pids" + #else + --pathways_input="$species.pathways.file" + --header2="$species.pathways.header2" + --pathway_col="$species.pathways.ncol2" + #end if + --id_type="$id_type" + --native_kegg="$native" + + + #if $input.ids=="file" and $input.foldchange.fc=="true" + --fold_change_data="$input.foldchange.fc" + --fold_change_col="$input.foldchange.fc_col" + #else + --fold_change_data="false" + #end if + + --species=${species.ref_file} + --pathways_list=$__tool_directory__/${ filter( lambda x: str( x[0] ) == str( $species.ref_file ), $__app__.tool_data_tables['kegg_pathways_list_index'].get_fields() )[0][-1] } + --output="$text_output" + + ]]></command> + <inputs> + <conditional name="species"> + <param name="ref_file" type="select" label="Select species" > + <option value="hsa">Human (H. sapiens)</option> + <option value="mmu">Mouse (M. musculus)</option> + <option value="rno">Rat (R. norvegicus)</option> + </param> + <when value="hsa"> + <conditional name="pathways"> + <param name="pathways_id" type="select" label="Provide your pathway(s)" help="Enter KEGG pathway name(s) or KEGG pathway id(s)"> + <option value="pathways_names">KEGG pathway name(s)</option> + <option value="pathways_ids">KEGG pathway id(s)</option> + <option value="pathways_file">KEGG pathway id(s) from file</option> + </param> + <when value="pathways_names"> + <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'> + <options from_data_table="hsa_pathways"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="pathways_ids"> + <param name="pids" type="text" label="Copy/paste your pathway id(s)" help='IDs must be separated by tab, space or carriage return into the form field, for example: "hsa00010 hsa05412"'> + <sanitizer invalid_char=''> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> + </when> + <when value="pathways_file"> + <param name="file" type="data" format="txt,tabular" label="Select a file with a column of pathways id" help="Pathway id format : 'path:hsa00010' or 'hsa00010' or '00010'" /> + <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contains a header?" /> + <param name="ncol2" type="text" value="c1" label="The column which contains your pathways ids" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + </when> + </conditional> + </when> + <when value="mmu"> + <conditional name="pathways"> + <param name="pathways_id" type="select" label="Provide your pathway(s)" help="Enter KEGG pathway name(s) or KEGG pathway id(s)"> + <option value="pathways_names">KEGG pathway name(s)</option> + <option value="pathways_ids">KEGG pathway id(s)</option> + <option value="pathways_file">KEGG pathway id(s) from file</option> + </param> + <when value="pathways_names"> + <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'> + <options from_data_table="mmu_pathways"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="pathways_ids"> + <param name="pids" type="text" label="Copy/paste your pathway id(s)" help='IDs must be separated by tab, space or carriage return into the form field, for example: "mmu00053 mmu00340"'> + <sanitizer invalid_char=''> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> + </when> + <when value="pathways_file"> + <param name="file" type="data" format="txt,tabular" label="Select a file with a column of pathways id " help="Pathway id format : 'path:mmu00053' or 'mmu00053' or '00053'" /> + <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> + <param name="ncol2" type="text" value="c1" label="The column which contains your pathways ids" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + </when> + </conditional> + </when> + <when value="rno"> + <conditional name="pathways"> + <param name="pathways_id" type="select" label="Enter your pathway(s) name/id" help="Enter KEGG pathway name(s) or KEGG pathway id(s)"> + <option value="pathways_names">KEGG pathway name(s)</option> + <option value="pathways_ids">KEGG pathway id(s)</option> + <option value="pathways_file">KEGG pathway id(s) from file</option> + </param> + <when value="pathways_names"> + <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'> + <options from_data_table="rno_pathways"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="pathways_ids"> + <param name="pids" type="text" label="Copy/paste your pathway id(s)" help='IDs must be separated by tab, space or carriage return into the form field, for example: "hsa00010 hsa05412"'> + <sanitizer invalid_char=''> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> + </when> + <when value="pathways_file"> + <param name="file" type="data" format="txt,tabular" label="Select a file with a column of pathways id" help="Pathway id format : 'path:hsa00010' or 'hsa00010' or '00010'" /> + <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contains a header?" /> + <param name="ncol2" type="text" value="c1" label="The column which contains your pathways ids" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + </when> + </conditional> + </when> + </conditional> + <param name="id_type" type="select" label="Select your identifiers type :"> + <option value="uniprotID">Uniprot Accession number</option> + <option value="geneID">Entrez gene ID</option> + <!--option value="keggid">KEGG genes ID</option--> + </param> + <conditional name="input" > + <param name="ids" type="select" label="Enter your identifiers (Uniprot AC or Entrez gene ID)" help="Copy/paste or ID list from a file (e.g. table)" > + <option value="text">Copy/paste your identifiers</option> + <option value="file" selected="true">Input file containing your identifiers</option> + </param> + <when value="text" > + <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' > + <sanitizer invalid_char=''> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </when> + <when value="file" > + <param name="file" type="data" format="txt,tabular" label="Select a file that contains your list of IDs" help="" /> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contains a header?" /> + <param name="ncol" type="text" value="c1" label="The column which contains your IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + <conditional name="foldchange" > + <param name="fc" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Do you have fold change values to represent on the graph ?" default="false"/> + <when value="true"> + <param name="fc_col" type="text" label="Please enter column(s) number of fold change data separated by ','. 3 columns max" help="For example : c1,c3,c4"/> + </when> + <when value="false"/> + </conditional> + </when> + </conditional> + <param name="native" type="select" label="Choose the output graphical format"> + <option value="true">KEGG map (.png)</option> + <option value="false">Graphviz layout engine (.pdf)</option> + </param> + </inputs> + <outputs> + <data name="text_output" format="tsv" label="KEGG pathways visualization text output"/> + <collection type="list" label="KEGG pathways map from ${input.file.name}" name="graphviz_output_from_file"> + <filter>native=="false" and input["ids"] == "file"</filter> + <discover_datasets pattern="(?P<designation>.+\..*)\.pdf" ext="pdf" /> + </collection> + <collection type="list" label="KEGG pathways map from ${input.file.name}" name="kegg_graph_output_from_file"> + <filter>native=="true" and input["ids"] == "file"</filter> + <discover_datasets pattern="(?P<designation>.+\..*)\.png" ext="png"/> + </collection> + <collection type="list" label="KEGG pathways map" name="graphviz_output_from_list"> + <filter>native=="false" and input["ids"] == "text"</filter> + <discover_datasets pattern="(?P<designation>.+\..*)\.pdf" ext="pdf" /> + </collection> + <collection type="list" label="KEGG pathways map" name="kegg_graph_output_from_list"> + <filter>native=="true" and input["ids"] == "text"</filter> + <discover_datasets pattern="(?P<designation>.+\..*)\.png" ext="png" /> + </collection> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="ids" value="file"/> + <param name="file" value="Lacombe_et_al_2017_OK.txt"/> + <param name="header" value="true"/> + <param name="ncol" value="c1"/> + </conditional> + <conditional name="pathways"> + <param name="pathways_id" value="pathways_ids"/> + <param name="pids" value="04514,05167,00010"/> + </conditional> + <param name="id_type" value="uniprotID"/> + <param name="species" value="hsa"/> + <param name="native" value="true"/> + <output name="kegg_from_file" file="hsa04514.pathview.png" compare="sim_size"/> + <output name="kegg_from_file" file="hsa05167.pathview.png" compare="sim_size"/> + <output name="kegg_from_file" file="hsa00010.pathview.png" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ +This tool map a list of Uniprot Accession number or Entrez gene ID to KEGG pathway with pathview R package. + +You can map Entrez gene IDs / Uniprot accession number from three species : human, mouse and rat. + +If your input have another type of IDs, please use the ID_Converter tool. + +**Input:** + + +- KEGG Pathways IDs to be used for mapping can be set by: + - chosing from the KEGG pathways name list + - giving a list (copy/paste) + - importing a list from a dataset (column) - output of KEGG pathways identification and coverage can be used (1st column) +- Genes/proteins ids to map can be either a list of Entrez genes IDs / Uniprot accession number or a file (tabular, tsv, txt) containing at least one column of Entrez genes IDs / Uniprot accession number. +- fold change values (up to three columns) from a dataset (same dataset as for Genes/proteins ids to map) + +You can see below an example of an input file with identifiers (uniprot_AC) and fold_change values. + +.. csv-table:: Simulated data + :header: "Uniprot_AC","Protein.name","Number_of_peptides","fc_values 1","fc_values 2","fc_values 3" + + "P15924","Desmoplakin","69","0.172302292051025","-0.757435966487116","0.0411240398990759" + "P02538","Keratin, type II cytoskeletal 6A","53","-0.988842456122076","0.654626325100182","-0.219153396366064" + "P02768","Serum albumin","44","-0.983493243315454","0.113752002761474","-0.645886132600729" + "P08779","Keratin, type I cytoskeletal 16","29","0.552302597284443","-0.329045605110646","2.10616106806788" + +| + +**Output:** + +- a **collection dataset** named 'KEGG pathways map from <dataset>', one file (png or pdf) for each given pathway. +- a **summary text file** (.tsv) of the mapping(s) with the following columns + - **KEGG pathway ID**: KEGG pathway(s) used to map given genes/proteins ids + - **pathway name**: name(s) of KEGG pathway(s) used for mapping + - **nb of Uniprot_AC used** (only when Uniprot accession number is given): number of Uniprot accession number which will be converted to Entrez genes IDs + - **nb of Entrez gene ID used**: number of Entrez gene IDs used for mapping + - **nb of Entrez gene ID mapped**: number of Entrez gene IDs mapped on a given pathway + - **nb of Entrez gene ID in the pathway**: number total of Entrez gene IDs in a given pathway + - **ratio of Entrez gene ID mapped**: number of Entrez gene IDs mapped / number total of Entrez gene IDs + - **Entrez gene ID mapped**: list of mapped Entrez gene IDs + - **uniprot_AC mapped** (only when Uniprot accession number is given): list of Uniprot accession number corresponding to the mapped Entrez gene IDs + +----- + +.. class:: infomark + +**Database:** + +KEGG Pathways names list are from http://rest.kegg.jp/list/pathway/ + +User manual / Documentation: http://www.bioconductor.org/packages/release/bioc/html/pathview.html + + +----- + +.. class:: infomark + +**Authors** + +David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + ]]></help> + <citations> + <citation type="doi">10.1093/nar/gkx372</citation> + <citation type="bibtex"> +@misc{renameTODO, + author = {Weijun Luo}, + year = {2013}, + title = {pathview}, + url = {https://bioconductor.org/packages/release/bioc/html/pathview.html}, +}</citation> + </citations> +</tool>
--- a/mmu_pathways.loc.sample Fri Sep 14 09:52:28 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,326 +0,0 @@ -#value name -mmu00010 Glycolysis / Gluconeogenesis -mmu00020 Citrate cycle (TCA cycle) -mmu00030 Pentose phosphate pathway -mmu00040 Pentose and glucuronate interconversions -mmu00051 Fructose and mannose metabolism -mmu00052 Galactose metabolism -mmu00053 Ascorbate and aldarate metabolism -mmu00061 Fatty acid biosynthesis -mmu00062 Fatty acid elongation -mmu00071 Fatty acid degradation -mmu00072 Synthesis and degradation of ketone bodies -mmu00100 Steroid biosynthesis -mmu00120 Primary bile acid biosynthesis -mmu00130 Ubiquinone and other terpenoid-quinone biosynthesis -mmu00140 Steroid hormone biosynthesis -mmu00190 Oxidative phosphorylation -mmu00220 Arginine biosynthesis -mmu00230 Purine metabolism -mmu00232 Caffeine metabolism -mmu00240 Pyrimidine metabolism -mmu00250 Alanine, aspartate and glutamate metabolism -mmu00260 Glycine, serine and threonine metabolism -mmu00270 Cysteine and methionine metabolism -mmu00280 Valine, leucine and isoleucine degradation -mmu00290 Valine, leucine and isoleucine biosynthesis -mmu00310 Lysine degradation -mmu00330 Arginine and proline metabolism -mmu00340 Histidine metabolism -mmu00350 Tyrosine metabolism -mmu00360 Phenylalanine metabolism -mmu00380 Tryptophan metabolism -mmu00400 Phenylalanine, tyrosine and tryptophan biosynthesis -mmu00410 beta-Alanine metabolism -mmu00430 Taurine and hypotaurine metabolism -mmu00440 Phosphonate and phosphinate metabolism -mmu00450 Selenocompound metabolism -mmu00471 D-Glutamine and D-glutamate metabolism -mmu00472 D-Arginine and D-ornithine metabolism -mmu00480 Glutathione metabolism -mmu00500 Starch and sucrose metabolism -mmu00510 N-Glycan biosynthesis -mmu00511 Other glycan degradation -mmu00512 Mucin type O-glycan biosynthesis -mmu00514 Other types of O-glycan biosynthesis -mmu00515 Mannose type O-glycan biosynthesis -mmu00520 Amino sugar and nucleotide sugar metabolism -mmu00524 Neomycin, kanamycin and gentamicin biosynthesis -mmu00531 Glycosaminoglycan degradation -mmu00532 Glycosaminoglycan biosynthesis -mmu00533 Glycosaminoglycan biosynthesis -mmu00534 Glycosaminoglycan biosynthesis -mmu00561 Glycerolipid metabolism -mmu00562 Inositol phosphate metabolism -mmu00563 Glycosylphosphatidylinositol (GPI)-anchor biosynthesis -mmu00564 Glycerophospholipid metabolism -mmu00565 Ether lipid metabolism -mmu00590 Arachidonic acid metabolism -mmu00591 Linoleic acid metabolism -mmu00592 alpha-Linolenic acid metabolism -mmu00600 Sphingolipid metabolism -mmu00601 Glycosphingolipid biosynthesis -mmu00603 Glycosphingolipid biosynthesis -mmu00604 Glycosphingolipid biosynthesis -mmu00620 Pyruvate metabolism -mmu00630 Glyoxylate and dicarboxylate metabolism -mmu00640 Propanoate metabolism -mmu00650 Butanoate metabolism -mmu00670 One carbon pool by folate -mmu00730 Thiamine metabolism -mmu00740 Riboflavin metabolism -mmu00750 Vitamin B6 metabolism -mmu00760 Nicotinate and nicotinamide metabolism -mmu00770 Pantothenate and CoA biosynthesis -mmu00780 Biotin metabolism -mmu00785 Lipoic acid metabolism -mmu00790 Folate biosynthesis -mmu00830 Retinol metabolism -mmu00860 Porphyrin and chlorophyll metabolism -mmu00900 Terpenoid backbone biosynthesis -mmu00910 Nitrogen metabolism -mmu00920 Sulfur metabolism -mmu00970 Aminoacyl-tRNA biosynthesis -mmu00980 Metabolism of xenobiotics by cytochrome P450 -mmu00982 Drug metabolism -mmu00983 Drug metabolism -mmu01040 Biosynthesis of unsaturated fatty acids -mmu01100 Metabolic pathways -mmu01200 Carbon metabolism -mmu01210 2-Oxocarboxylic acid metabolism -mmu01212 Fatty acid metabolism -mmu01230 Biosynthesis of amino acids -mmu01521 EGFR tyrosine kinase inhibitor resistance -mmu01522 Endocrine resistance -mmu01523 Antifolate resistance -mmu01524 Platinum drug resistance -mmu02010 ABC transporters -mmu03008 Ribosome biogenesis in eukaryotes -mmu03010 Ribosome -mmu03013 RNA transport -mmu03015 mRNA surveillance pathway -mmu03018 RNA degradation -mmu03020 RNA polymerase -mmu03022 Basal transcription factors -mmu03030 DNA replication -mmu03040 Spliceosome -mmu03050 Proteasome -mmu03060 Protein export -mmu03320 PPAR signaling pathway -mmu03410 Base excision repair -mmu03420 Nucleotide excision repair -mmu03430 Mismatch repair -mmu03440 Homologous recombination -mmu03450 Non-homologous end-joining -mmu03460 Fanconi anemia pathway -mmu04010 MAPK signaling pathway -mmu04012 ErbB signaling pathway -mmu04014 Ras signaling pathway -mmu04015 Rap1 signaling pathway -mmu04020 Calcium signaling pathway -mmu04022 cGMP-PKG signaling pathway -mmu04024 cAMP signaling pathway -mmu04060 Cytokine-cytokine receptor interaction -mmu04062 Chemokine signaling pathway -mmu04064 NF-kappa B signaling pathway -mmu04066 HIF-1 signaling pathway -mmu04068 FoxO signaling pathway -mmu04070 Phosphatidylinositol signaling system -mmu04071 Sphingolipid signaling pathway -mmu04072 Phospholipase D signaling pathway -mmu04080 Neuroactive ligand-receptor interaction -mmu04110 Cell cycle -mmu04114 Oocyte meiosis -mmu04115 p53 signaling pathway -mmu04120 Ubiquitin mediated proteolysis -mmu04122 Sulfur relay system -mmu04130 SNARE interactions in vesicular transport -mmu04136 Autophagy -mmu04137 Mitophagy -mmu04140 Autophagy -mmu04141 Protein processing in endoplasmic reticulum -mmu04142 Lysosome -mmu04144 Endocytosis -mmu04145 Phagosome -mmu04146 Peroxisome -mmu04150 mTOR signaling pathway -mmu04151 PI3K-Akt signaling pathway -mmu04152 AMPK signaling pathway -mmu04210 Apoptosis -mmu04211 Longevity regulating pathway -mmu04213 Longevity regulating pathway -mmu04215 Apoptosis -mmu04216 Ferroptosis -mmu04217 Necroptosis -mmu04218 Cellular senescence -mmu04260 Cardiac muscle contraction -mmu04261 Adrenergic signaling in cardiomyocytes -mmu04270 Vascular smooth muscle contraction -mmu04310 Wnt signaling pathway -mmu04330 Notch signaling pathway -mmu04340 Hedgehog signaling pathway -mmu04350 TGF-beta signaling pathway -mmu04360 Axon guidance -mmu04370 VEGF signaling pathway -mmu04371 Apelin signaling pathway -mmu04380 Osteoclast differentiation -mmu04390 Hippo signaling pathway -mmu04392 Hippo signaling pathway -mmu04510 Focal adhesion -mmu04512 ECM-receptor interaction -mmu04514 Cell adhesion molecules (CAMs) -mmu04520 Adherens junction -mmu04530 Tight junction -mmu04540 Gap junction -mmu04550 Signaling pathways regulating pluripotency of stem cells -mmu04610 Complement and coagulation cascades -mmu04611 Platelet activation -mmu04612 Antigen processing and presentation -mmu04614 Renin-angiotensin system -mmu04620 Toll-like receptor signaling pathway -mmu04621 NOD-like receptor signaling pathway -mmu04622 RIG-I-like receptor signaling pathway -mmu04623 Cytosolic DNA-sensing pathway -mmu04625 C-type lectin receptor signaling pathway -mmu04630 Jak-STAT signaling pathway -mmu04640 Hematopoietic cell lineage -mmu04650 Natural killer cell mediated cytotoxicity -mmu04657 IL-17 signaling pathway -mmu04658 Th1 and Th2 cell differentiation -mmu04659 Th17 cell differentiation -mmu04660 T cell receptor signaling pathway -mmu04662 B cell receptor signaling pathway -mmu04664 Fc epsilon RI signaling pathway -mmu04666 Fc gamma R-mediated phagocytosis -mmu04668 TNF signaling pathway -mmu04670 Leukocyte transendothelial migration -mmu04672 Intestinal immune network for IgA production -mmu04710 Circadian rhythm -mmu04713 Circadian entrainment -mmu04714 Thermogenesis -mmu04720 Long-term potentiation -mmu04721 Synaptic vesicle cycle -mmu04722 Neurotrophin signaling pathway -mmu04723 Retrograde endocannabinoid signaling -mmu04724 Glutamatergic synapse -mmu04725 Cholinergic synapse -mmu04726 Serotonergic synapse -mmu04727 GABAergic synapse -mmu04728 Dopaminergic synapse -mmu04730 Long-term depression -mmu04740 Olfactory transduction -mmu04742 Taste transduction -mmu04744 Phototransduction -mmu04750 Inflammatory mediator regulation of TRP channels -mmu04810 Regulation of actin cytoskeleton -mmu04910 Insulin signaling pathway -mmu04911 Insulin secretion -mmu04912 GnRH signaling pathway -mmu04913 Ovarian steroidogenesis -mmu04914 Progesterone-mediated oocyte maturation -mmu04915 Estrogen signaling pathway -mmu04916 Melanogenesis -mmu04917 Prolactin signaling pathway -mmu04918 Thyroid hormone synthesis -mmu04919 Thyroid hormone signaling pathway -mmu04920 Adipocytokine signaling pathway -mmu04921 Oxytocin signaling pathway -mmu04922 Glucagon signaling pathway -mmu04923 Regulation of lipolysis in adipocytes -mmu04924 Renin secretion -mmu04925 Aldosterone synthesis and secretion -mmu04926 Relaxin signaling pathway -mmu04927 Cortisol synthesis and secretion -mmu04928 Parathyroid hormone synthesis, secretion and action -mmu04930 Type II diabetes mellitus -mmu04931 Insulin resistance -mmu04932 Non-alcoholic fatty liver disease (NAFLD) -mmu04933 AGE-RAGE signaling pathway in diabetic complications -mmu04934 Cushing's syndrome -mmu04940 Type I diabetes mellitus -mmu04950 Maturity onset diabetes of the young -mmu04960 Aldosterone-regulated sodium reabsorption -mmu04961 Endocrine and other factor-regulated calcium reabsorption -mmu04962 Vasopressin-regulated water reabsorption -mmu04964 Proximal tubule bicarbonate reclamation -mmu04966 Collecting duct acid secretion -mmu04970 Salivary secretion -mmu04971 Gastric acid secretion -mmu04972 Pancreatic secretion -mmu04973 Carbohydrate digestion and absorption -mmu04974 Protein digestion and absorption -mmu04975 Fat digestion and absorption -mmu04976 Bile secretion -mmu04977 Vitamin digestion and absorption -mmu04978 Mineral absorption -mmu04979 Cholesterol metabolism -mmu05010 Alzheimer's disease -mmu05012 Parkinson's disease -mmu05014 Amyotrophic lateral sclerosis (ALS) -mmu05016 Huntington's disease -mmu05020 Prion diseases -mmu05030 Cocaine addiction -mmu05031 Amphetamine addiction -mmu05032 Morphine addiction -mmu05033 Nicotine addiction -mmu05034 Alcoholism -mmu05100 Bacterial invasion of epithelial cells -mmu05132 Salmonella infection -mmu05133 Pertussis -mmu05134 Legionellosis -mmu05140 Leishmaniasis -mmu05142 Chagas disease (American trypanosomiasis) -mmu05143 African trypanosomiasis -mmu05144 Malaria -mmu05145 Toxoplasmosis -mmu05146 Amoebiasis -mmu05150 Staphylococcus aureus infection -mmu05152 Tuberculosis -mmu05160 Hepatitis C -mmu05161 Hepatitis B -mmu05162 Measles -mmu05163 Human cytomegalovirus infection -mmu05164 Influenza A -mmu05165 Human papillomavirus infection -mmu05166 HTLV-I infection -mmu05167 Kaposi's sarcoma-associated herpesvirus infection -mmu05168 Herpes simplex infection -mmu05169 Epstein-Barr virus infection -mmu05200 Pathways in cancer -mmu05202 Transcriptional misregulation in cancer -mmu05203 Viral carcinogenesis -mmu05204 Chemical carcinogenesis -mmu05205 Proteoglycans in cancer -mmu05206 MicroRNAs in cancer -mmu05210 Colorectal cancer -mmu05211 Renal cell carcinoma -mmu05212 Pancreatic cancer -mmu05213 Endometrial cancer -mmu05214 Glioma -mmu05215 Prostate cancer -mmu05216 Thyroid cancer -mmu05217 Basal cell carcinoma -mmu05218 Melanoma -mmu05219 Bladder cancer -mmu05220 Chronic myeloid leukemia -mmu05221 Acute myeloid leukemia -mmu05222 Small cell lung cancer -mmu05223 Non-small cell lung cancer -mmu05224 Breast cancer -mmu05225 Hepatocellular carcinoma -mmu05226 Gastric cancer -mmu05230 Central carbon metabolism in cancer -mmu05231 Choline metabolism in cancer -mmu05310 Asthma -mmu05320 Autoimmune thyroid disease -mmu05321 Inflammatory bowel disease (IBD) -mmu05322 Systemic lupus erythematosus -mmu05323 Rheumatoid arthritis -mmu05330 Allograft rejection -mmu05332 Graft-versus-host disease -mmu05340 Primary immunodeficiency -mmu05410 Hypertrophic cardiomyopathy (HCM) -mmu05412 Arrhythmogenic right ventricular cardiomyopathy (ARVC) -mmu05414 Dilated cardiomyopathy (DCM) -mmu05416 Viral myocarditis -mmu05418 Fluid shear stress and atherosclerosis
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rno_pathways.loc Fri Nov 09 05:11:46 2018 -0500 @@ -0,0 +1,326 @@ +rno00010 Glycolysis / Gluconeogenesis +rno00020 Citrate cycle (TCA cycle) +rno00030 Pentose phosphate pathway +rno00040 Pentose and glucuronate interconversions +rno00051 Fructose and mannose metabolism +rno00052 Galactose metabolism +rno00053 Ascorbate and aldarate metabolism +rno00061 Fatty acid biosynthesis +rno00062 Fatty acid elongation +rno00071 Fatty acid degradation +rno00072 Synthesis and degradation of ketone bodies +rno00100 Steroid biosynthesis +rno00120 Primary bile acid biosynthesis +rno00130 Ubiquinone and other terpenoid-quinone biosynthesis +rno00140 Steroid hormone biosynthesis +rno00190 Oxidative phosphorylation +rno00220 Arginine biosynthesis +rno00230 Purine metabolism +rno00232 Caffeine metabolism +rno00240 Pyrimidine metabolism +rno00250 Alanine, aspartate and glutamate metabolism +rno00260 Glycine, serine and threonine metabolism +rno00270 Cysteine and methionine metabolism +rno00280 Valine, leucine and isoleucine degradation +rno00290 Valine, leucine and isoleucine biosynthesis +rno00310 Lysine degradation +rno00330 Arginine and proline metabolism +rno00340 Histidine metabolism +rno00350 Tyrosine metabolism +rno00360 Phenylalanine metabolism +rno00380 Tryptophan metabolism +rno00400 Phenylalanine, tyrosine and tryptophan biosynthesis +rno00410 beta-Alanine metabolism +rno00430 Taurine and hypotaurine metabolism +rno00440 Phosphonate and phosphinate metabolism +rno00450 Selenocompound metabolism +rno00471 D-Glutamine and D-glutamate metabolism +rno00472 D-Arginine and D-ornithine metabolism +rno00480 Glutathione metabolism +rno00500 Starch and sucrose metabolism +rno00510 N-Glycan biosynthesis +rno00511 Other glycan degradation +rno00512 Mucin type O-glycan biosynthesis +rno00514 Other types of O-glycan biosynthesis +rno00515 Mannose type O-glycan biosynthesis +rno00520 Amino sugar and nucleotide sugar metabolism +rno00524 Neomycin, kanamycin and gentamicin biosynthesis +rno00531 Glycosaminoglycan degradation +rno00532 Glycosaminoglycan biosynthesis +rno00533 Glycosaminoglycan biosynthesis +rno00534 Glycosaminoglycan biosynthesis +rno00561 Glycerolipid metabolism +rno00562 Inositol phosphate metabolism +rno00563 Glycosylphosphatidylinositol (GPI)-anchor biosynthesis +rno00564 Glycerophospholipid metabolism +rno00565 Ether lipid metabolism +rno00590 Arachidonic acid metabolism +rno00591 Linoleic acid metabolism +rno00592 alpha-Linolenic acid metabolism +rno00600 Sphingolipid metabolism +rno00601 Glycosphingolipid biosynthesis +rno00603 Glycosphingolipid biosynthesis +rno00604 Glycosphingolipid biosynthesis +rno00620 Pyruvate metabolism +rno00630 Glyoxylate and dicarboxylate metabolism +rno00640 Propanoate metabolism +rno00650 Butanoate metabolism +rno00670 One carbon pool by folate +rno00730 Thiamine metabolism +rno00740 Riboflavin metabolism +rno00750 Vitamin B6 metabolism +rno00760 Nicotinate and nicotinamide metabolism +rno00770 Pantothenate and CoA biosynthesis +rno00780 Biotin metabolism +rno00785 Lipoic acid metabolism +rno00790 Folate biosynthesis +rno00830 Retinol metabolism +rno00860 Porphyrin and chlorophyll metabolism +rno00900 Terpenoid backbone biosynthesis +rno00910 Nitrogen metabolism +rno00920 Sulfur metabolism +rno00970 Aminoacyl-tRNA biosynthesis +rno00980 Metabolism of xenobiotics by cytochrome P450 +rno00982 Drug metabolism +rno00983 Drug metabolism +rno01040 Biosynthesis of unsaturated fatty acids +rno01100 Metabolic pathways +rno01200 Carbon metabolism +rno01210 2-Oxocarboxylic acid metabolism +rno01212 Fatty acid metabolism +rno01230 Biosynthesis of amino acids +rno01521 EGFR tyrosine kinase inhibitor resistance +rno01522 Endocrine resistance +rno01523 Antifolate resistance +rno01524 Platinum drug resistance +rno02010 ABC transporters +rno03008 Ribosome biogenesis in eukaryotes +rno03010 Ribosome +rno03013 RNA transport +rno03015 mRNA surveillance pathway +rno03018 RNA degradation +rno03020 RNA polymerase +rno03022 Basal transcription factors +rno03030 DNA replication +rno03040 Spliceosome +rno03050 Proteasome +rno03060 Protein export +rno03320 PPAR signaling pathway +rno03410 Base excision repair +rno03420 Nucleotide excision repair +rno03430 Mismatch repair +rno03440 Homologous recombination +rno03450 Non-homologous end-joining +rno03460 Fanconi anemia pathway +rno04010 MAPK signaling pathway +rno04012 ErbB signaling pathway +rno04014 Ras signaling pathway +rno04015 Rap1 signaling pathway +rno04020 Calcium signaling pathway +rno04022 cGMP-PKG signaling pathway +rno04024 cAMP signaling pathway +rno04060 Cytokine-cytokine receptor interaction +rno04062 Chemokine signaling pathway +rno04064 NF-kappa B signaling pathway +rno04066 HIF-1 signaling pathway +rno04068 FoxO signaling pathway +rno04070 Phosphatidylinositol signaling system +rno04071 Sphingolipid signaling pathway +rno04072 Phospholipase D signaling pathway +rno04080 Neuroactive ligand-receptor interaction +rno04110 Cell cycle +rno04114 Oocyte meiosis +rno04115 p53 signaling pathway +rno04120 Ubiquitin mediated proteolysis +rno04122 Sulfur relay system +rno04130 SNARE interactions in vesicular transport +rno04136 Autophagy +rno04137 Mitophagy +rno04140 Autophagy +rno04141 Protein processing in endoplasmic reticulum +rno04142 Lysosome +rno04144 Endocytosis +rno04145 Phagosome +rno04146 Peroxisome +rno04150 mTOR signaling pathway +rno04151 PI3K-Akt signaling pathway +rno04152 AMPK signaling pathway +rno04210 Apoptosis +rno04211 Longevity regulating pathway +rno04213 Longevity regulating pathway +rno04215 Apoptosis +rno04216 Ferroptosis +rno04217 Necroptosis +rno04218 Cellular senescence +rno04260 Cardiac muscle contraction +rno04261 Adrenergic signaling in cardiomyocytes +rno04270 Vascular smooth muscle contraction +rno04310 Wnt signaling pathway +rno04330 Notch signaling pathway +rno04340 Hedgehog signaling pathway +rno04350 TGF-beta signaling pathway +rno04360 Axon guidance +rno04370 VEGF signaling pathway +rno04371 Apelin signaling pathway +rno04380 Osteoclast differentiation +rno04390 Hippo signaling pathway +rno04392 Hippo signaling pathway +rno04510 Focal adhesion +rno04512 ECM-receptor interaction +rno04514 Cell adhesion molecules (CAMs) +rno04520 Adherens junction +rno04530 Tight junction +rno04540 Gap junction +rno04550 Signaling pathways regulating pluripotency of stem cells +rno04610 Complement and coagulation cascades +rno04611 Platelet activation +rno04612 Antigen processing and presentation +rno04614 Renin-angiotensin system +rno04620 Toll-like receptor signaling pathway +rno04621 NOD-like receptor signaling pathway +rno04622 RIG-I-like receptor signaling pathway +rno04623 Cytosolic DNA-sensing pathway +rno04625 C-type lectin receptor signaling pathway +rno04630 JAK-STAT signaling pathway +rno04640 Hematopoietic cell lineage +rno04650 Natural killer cell mediated cytotoxicity +rno04657 IL-17 signaling pathway +rno04658 Th1 and Th2 cell differentiation +rno04659 Th17 cell differentiation +rno04660 T cell receptor signaling pathway +rno04662 B cell receptor signaling pathway +rno04664 Fc epsilon RI signaling pathway +rno04666 Fc gamma R-mediated phagocytosis +rno04668 TNF signaling pathway +rno04670 Leukocyte transendothelial migration +rno04672 Intestinal immune network for IgA production +rno04710 Circadian rhythm +rno04713 Circadian entrainment +rno04714 Thermogenesis +rno04720 Long-term potentiation +rno04721 Synaptic vesicle cycle +rno04722 Neurotrophin signaling pathway +rno04723 Retrograde endocannabinoid signaling +rno04724 Glutamatergic synapse +rno04725 Cholinergic synapse +rno04726 Serotonergic synapse +rno04727 GABAergic synapse +rno04728 Dopaminergic synapse +rno04730 Long-term depression +rno04740 Olfactory transduction +rno04742 Taste transduction +rno04744 Phototransduction +rno04750 Inflammatory mediator regulation of TRP channels +rno04810 Regulation of actin cytoskeleton +rno04910 Insulin signaling pathway +rno04911 Insulin secretion +rno04912 GnRH signaling pathway +rno04913 Ovarian steroidogenesis +rno04914 Progesterone-mediated oocyte maturation +rno04915 Estrogen signaling pathway +rno04916 Melanogenesis +rno04917 Prolactin signaling pathway +rno04918 Thyroid hormone synthesis +rno04919 Thyroid hormone signaling pathway +rno04920 Adipocytokine signaling pathway +rno04921 Oxytocin signaling pathway +rno04922 Glucagon signaling pathway +rno04923 Regulation of lipolysis in adipocytes +rno04924 Renin secretion +rno04925 Aldosterone synthesis and secretion +rno04926 Relaxin signaling pathway +rno04927 Cortisol synthesis and secretion +rno04928 Parathyroid hormone synthesis, secretion and action +rno04930 Type II diabetes mellitus +rno04931 Insulin resistance +rno04932 Non-alcoholic fatty liver disease (NAFLD) +rno04933 AGE-RAGE signaling pathway in diabetic complications +rno04934 Cushing syndrome +rno04940 Type I diabetes mellitus +rno04950 Maturity onset diabetes of the young +rno04960 Aldosterone-regulated sodium reabsorption +rno04961 Endocrine and other factor-regulated calcium reabsorption +rno04962 Vasopressin-regulated water reabsorption +rno04964 Proximal tubule bicarbonate reclamation +rno04966 Collecting duct acid secretion +rno04970 Salivary secretion +rno04971 Gastric acid secretion +rno04972 Pancreatic secretion +rno04973 Carbohydrate digestion and absorption +rno04974 Protein digestion and absorption +rno04975 Fat digestion and absorption +rno04976 Bile secretion +rno04977 Vitamin digestion and absorption +rno04978 Mineral absorption +rno04979 Cholesterol metabolism +rno05010 Alzheimer disease +rno05012 Parkinson disease +rno05014 Amyotrophic lateral sclerosis (ALS) +rno05016 Huntington disease +rno05020 Prion diseases +rno05030 Cocaine addiction +rno05031 Amphetamine addiction +rno05032 Morphine addiction +rno05033 Nicotine addiction +rno05034 Alcoholism +rno05100 Bacterial invasion of epithelial cells +rno05132 Salmonella infection +rno05133 Pertussis +rno05134 Legionellosis +rno05140 Leishmaniasis +rno05142 Chagas disease (American trypanosomiasis) +rno05143 African trypanosomiasis +rno05144 Malaria +rno05145 Toxoplasmosis +rno05146 Amoebiasis +rno05150 Staphylococcus aureus infection +rno05152 Tuberculosis +rno05160 Hepatitis C +rno05161 Hepatitis B +rno05162 Measles +rno05163 Human cytomegalovirus infection +rno05164 Influenza A +rno05165 Human papillomavirus infection +rno05166 Human T-cell leukemia virus 1 infection +rno05167 Kaposi sarcoma-associated herpesvirus infection +rno05168 Herpes simplex infection +rno05169 Epstein-Barr virus infection +rno05170 Human immunodeficiency virus 1 infection +rno05200 Pathways in cancer +rno05202 Transcriptional misregulation in cancer +rno05203 Viral carcinogenesis +rno05204 Chemical carcinogenesis +rno05205 Proteoglycans in cancer +rno05206 MicroRNAs in cancer +rno05210 Colorectal cancer +rno05211 Renal cell carcinoma +rno05212 Pancreatic cancer +rno05213 Endometrial cancer +rno05214 Glioma +rno05215 Prostate cancer +rno05216 Thyroid cancer +rno05217 Basal cell carcinoma +rno05218 Melanoma +rno05219 Bladder cancer +rno05220 Chronic myeloid leukemia +rno05221 Acute myeloid leukemia +rno05222 Small cell lung cancer +rno05223 Non-small cell lung cancer +rno05224 Breast cancer +rno05225 Hepatocellular carcinoma +rno05226 Gastric cancer +rno05230 Central carbon metabolism in cancer +rno05231 Choline metabolism in cancer +rno05310 Asthma +rno05320 Autoimmune thyroid disease +rno05321 Inflammatory bowel disease (IBD) +rno05322 Systemic lupus erythematosus +rno05323 Rheumatoid arthritis +rno05330 Allograft rejection +rno05332 Graft-versus-host disease +rno05340 Primary immunodeficiency +rno05410 Hypertrophic cardiomyopathy (HCM) +rno05412 Arrhythmogenic right ventricular cardiomyopathy (ARVC) +rno05414 Dilated cardiomyopathy (DCM) +rno05416 Viral myocarditis +rno05418 Fluid shear stress and atherosclerosis
--- a/tool_data_table_conf.xml.sample Fri Sep 14 09:52:28 2018 -0400 +++ b/tool_data_table_conf.xml.sample Fri Nov 09 05:11:46 2018 -0500 @@ -1,5 +1,9 @@ <tables> <!-- Location kegg_pathways file for pathview tool --> + <table name="kegg_pathways_list_index" comment_char="#"> + <columns>value,name,path</columns> + <file path="tool-data/kegg_pathways_list_index.loc"/> + </table> <table name="hsa_pathways" comment_char="#"> <columns>value,name</columns> <file path="tool-data/hsa_pathways.loc" /> @@ -8,4 +12,8 @@ <columns>value,name</columns> <file path="tool-data/mmu_pathways.loc" /> </table> + <table name="rno_pathways" comment_char="#"> + <columns>value,name</columns> + <file path="tool-data/rno_pathways.loc" /> + </table> </tables>
