# HG changeset patch
# User proteore
# Date 1534951823 14400
# Node ID 097bb3ed2b4d436f87a2fa856398f66387f575a4
planemo upload commit 2e441b4969ae7cf9aeb227a1d47c43ef7268a5e6-dirty
diff -r 000000000000 -r 097bb3ed2b4d PathView.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/PathView.R Wed Aug 22 11:30:23 2018 -0400
@@ -0,0 +1,234 @@
+#!/usr/bin/Rscript
+#Rscript made for mapping genesID on KEGG pathway with Pathview package
+#input : csv file containing ids (uniprot or geneID) to map, plus parameters
+#output : KEGG pathway : jpeg or pdf file.
+
+suppressMessages(library("pathview"))
+
+read_file <- function(path,header){
+ file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE)
+ if (inherits(file,"try-error")){
+ stop("File not found !")
+ }else{
+ return(file)
+ }
+}
+
+##### fuction to clean and concatenate pathway name (allow more flexibility for user input)
+concat_string <- function(x){
+ x <- gsub(" - .*","",x)
+ x <- gsub(" ","",x)
+ x <- gsub("-","",x)
+ x <- gsub("_","",x)
+ x <- gsub(",","",x)
+ x <- gsub("\\'","",x)
+ x <- gsub("\\(.*)","",x)
+ x <- gsub("\\/","",x)
+ x <- tolower(x)
+ return(x)
+}
+
+
+get_args <- function(){
+
+ ## Collect arguments
+ args <- commandArgs(TRUE)
+
+ ## Default setting when no arguments passed
+ if(length(args) < 1) {
+ args <- c("--help")
+ }
+
+ ## Help section
+ if("--help" %in% args) {
+ cat("Pathview R script
+ Arguments:
+ --help Print this test
+ --input path of the input file (must contains a colum of uniprot and/or geneID accession number)
+ --id_list list of ids to use, ',' separated
+ --pathways_id Id(s) of pathway(s) to use, if several, semicolon separated list : hsa00010;hsa05412
+ --id_type Type of accession number ('uniprotID' or 'geneID')
+ --id_column Column containing accesion number of interest (ex : 'c1')
+ --header Boolean, TRUE if header FALSE if not
+ --ouput Output filename
+ --expression_values1 Column containing expression values (first condition)
+ --expression_values2 Column containing expression values (second condition)
+ --expression_values3 Column containing expression values (third condition)
+ --native_kegg TRUE : native KEGG graph, FALSE : Graphviz graph
+ --species KEGG species (hsa, mmu, ...)
+
+ Example:
+ ./PathView.R --input 'input.csv' --pathway_id '05412' --id_type 'uniprotID' --id_column 'c1' --header TRUE \n\n")
+
+ q(save="no")
+ }
+
+
+ #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
+ #load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
+ parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+ argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+ args <- as.list(as.character(argsDF$V2))
+ names(args) <- argsDF$V1
+
+ return(args)
+}
+
+str2bool <- function(x){
+ if (any(is.element(c("t","true"),tolower(x)))){
+ return (TRUE)
+ }else if (any(is.element(c("f","false"),tolower(x)))){
+ return (FALSE)
+ }else{
+ return(NULL)
+ }
+}
+
+is.letter <- function(x) grepl("[[:alpha:]]", x)
+
+#### hsa00010 -> 00010
+remove_kegg_prefix <- function(x){
+ if (is.letter(substr(x,1,3))){
+ x <- substr(x,4,nchar(x))
+ }
+ return(x)
+}
+
+clean_bad_character <- function(string) {
+ string <- gsub("X","",string)
+ string <- gsub(" ","",string)
+ return(string)
+}
+
+args <- get_args()
+
+###save and load args in rda file for testing
+#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
+#load("/home/dchristiany/proteore_project/ProteoRE/tools/pathview/args.Rda")
+
+###setting variables
+if (!is.null(args$pathways_id)) { ids <- sapply(rapply(strsplit(clean_bad_character(args$pathways_id),","),c), function(x) remove_kegg_prefix(x),USE.NAMES = FALSE)}
+#if (!is.null(args$pathways_name)) {names <- as.vector(sapply(strsplit(args$pathways_name,","), function(x) concat_string(x),USE.NAMES = FALSE))}
+if (!is.null(args$id_list)) {id_list <- as.vector(strsplit(clean_bad_character(args$id_list),","))}
+id_type <- tolower(args$id_type)
+ncol <- as.numeric(gsub("c", "" ,args$id_column))
+header <- str2bool(args$header)
+#output <- args$output
+native_kegg <- str2bool(args$native_kegg)
+species=args$species
+
+
+#read input file or list
+if (!is.null(args$input)){
+ tab <- read_file(args$input,header)
+ tab <- tab[!apply(is.na(tab) | tab == "", 1, all),] #delete empty rows
+} else {
+ tab <- data.frame(id_list)
+ ncol=1
+}
+
+e1 <- as.numeric(gsub("c", "" ,args$expression_values1))
+if (!is.null(args$expression_values1)) { colnames(tab)[e1] <- "e1" }
+e2 <- as.numeric(gsub("c", "" ,args$expression_values2))
+if (!is.null(args$expression_values2)) { colnames(tab)[e2] <- "e2" }
+e3 <- as.numeric(gsub("c", "" ,args$expression_values3))
+if (!is.null(args$expression_values3)) { colnames(tab)[e3] <- "e3" }
+
+
+##### map uniprotID to entrez geneID
+if (id_type == "uniprotid") {
+
+ uniprotID = tab[,ncol]
+ mapped2geneID = id2eg(ids = uniprotID, category = "uniprot", org = "Hs", pkg.name = NULL)
+ geneID = mapped2geneID[,2]
+ tab = cbind(tab,geneID)
+
+}else if (id_type == "geneid"){
+
+ colnames(tab)[ncol] <- "geneID"
+
+}
+
+geneID = tab$geneID[which(tab$geneID !="NA")]
+geneID = gsub(" ","",geneID)
+geneID = unlist(strsplit(geneID,"[;]"))
+
+
+#### get hsa pathways list
+#download.file(url = "http://rest.kegg.jp/link/pathway/hsa", destfile = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv")
+#geneid_hsa_pathways <- read_file(path = "/home/dchristiany/proteore_project/ProteoRE/tools/pathview/geneID_to_hsa_pathways.csv",FALSE)
+#names(geneid_hsa_pathways) <- c("geneID","pathway")
+
+##### build matrix to map on KEGG pathway (kgml : KEGG xml)
+if (!is.null(args$expression_values1)&is.null(args$expression_values2)&is.null(args$expression_values3)){
+ mat <- as.data.frame(cbind(tab$e1)[which(!is.na(tab$geneID)),])
+ row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))]
+} else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&is.null(args$expression_values3)){
+ mat <- as.data.frame(cbind(tab$e1,tab$e2)[which(!is.na(tab$geneID)),])
+ row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))]
+}else if (!is.null(args$expression_values1)&!is.null(args$expression_values2)&!is.null(args$expression_values3)){
+ mat <- as.data.frame(cbind(tab$e1,tab$e2,tab$e3)[which(!is.na(tab$geneID)),])
+ row.names(mat) <- tab$geneID[which(!is.na(tab$geneID))]
+} else {
+ mat <- geneID
+}
+
+
+#### simulation data test
+#exp1 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=100)
+#exp2 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=50)
+#exp3 <- sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, species="hsa", discrete = FALSE, nmol = 161, nexp = 1, rand.seed=10)
+#tab <- cbind(tab,exp1,exp2,exp3)
+
+#write.table(tab, file='/home/dchristiany/proteore_project/ProteoRE/tools/pathview/Lacombe_sim_expression_data.tsv', quote=FALSE, sep='\t',row.names = FALSE)
+
+#mat <- exp1[1:nrow(tab)]
+#names(mat) <- geneID
+
+
+#####mapping geneID (with or without expression values) on KEGG pathway
+for (id in ids) {
+ pathview(gene.data = mat,
+ #gene.idtype = "geneID",
+ #cpd.data = uniprotID,
+ #cpd.idtype = "uniprot",
+ pathway.id = id,
+ #pathway.name = "",
+ species = species,
+ kegg.dir = ".",
+ gene.idtype = "entrez",
+ #gene.annotpkg = NULL,
+ #min.nnodes = 3,
+ kegg.native = native_kegg,
+ #map.null = TRUE,
+ #expand.node = FALSE,
+ #split.group = FALSE,
+ #map.symbol = TRUE,
+ #map.cpdname = TRUE,
+ #node.sum = "sum",
+ #discrete=list(gene=FALSE,cpd=FALSE),
+ #limit = list(gene = 1, cpd = 1),
+ #bins = list(gene = 10, cpd = 10),
+ #both.dirs = list(gene = T, cpd = T),
+ #trans.fun = list(gene = NULL, cpd = NULL),
+ #low = list(gene = "green", cpd = "blue"),
+ #mid = list(gene = "gray", cpd = "gray"),
+ #high = list(gene = "red", cpd = "yellow"),
+ #na.col = "transparent",
+ #sign.pos="bottomleft",
+ #key.pos="topright",
+ #new.signature=TRUE,
+ #rankdir="LB",
+ #cex=0.3,
+ #text.width=15,
+ #res=300,
+ pdf.size=c(9,9))
+ #is.signal=TRUE)
+}
+
+########using keggview.native
+
+#xml.file=system.file("extdata", "hsa00010.xml", package = "pathview")
+#node.data=node.info("/home/dchristiany/hsa00010.xml")
+#plot.data.gene=node.map(mol.data=test, node.data, node.types="gene")
+#colors =node.color(plot.data = plot.data.gene[,1:9])
\ No newline at end of file
diff -r 000000000000 -r 097bb3ed2b4d Pathview.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Pathview.xml Wed Aug 22 11:30:23 2018 -0400
@@ -0,0 +1,234 @@
+
+
+ bioconductor-pathview
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ native=="false" and input["ids"] == "file"
+
+
+
+ native=="true" and input["ids"] == "file"
+
+
+
+ native=="false" and input["ids"] == "text"
+
+
+
+ native=="true" and input["ids"] == "text"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1093/nar/gkx372
+
+@misc{renameTODO,
+ author = {Weijun Luo},
+ year = {2013},
+ title = {pathview},
+ url = {https://bioconductor.org/packages/release/bioc/html/pathview.html},
+}
+
+
diff -r 000000000000 -r 097bb3ed2b4d README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Wed Aug 22 11:30:23 2018 -0400
@@ -0,0 +1,28 @@
+Wrapper for Pathview tool
+=============================
+
+**Authors**
+
+David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
+=============================
+
+This tool map a list of Uniprot Accession number or Entrez gene ID to KEGG pathway with pathview R package.
+
+Select an input file containing ids in a column, set header and column number or copy/paste your ids.
+
+Select your identifier type and a species of interest (for now only human available).
+
+Select one or several pathways of interest from the dropdown menu or copy/paste KEGG pathway id(s)
+
+Select the graph format : KEGG or graphviz
+
+Uniprot accession number converted to Entrez geneID or Entrez geneID are mapped to each selected pathways.
+
+Output : One file (png or pdf) for each selected pathway.
\ No newline at end of file
diff -r 000000000000 -r 097bb3ed2b4d kegg_pathways.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kegg_pathways.loc.sample Wed Aug 22 11:30:23 2018 -0400
@@ -0,0 +1,2 @@
+hsa_pathways Human (hsa) hsa tool-data/hsa_pathways.csv
+mmu_pathways Mouse (mmu) mmu tool-data/mmu_pathways.csv
\ No newline at end of file
diff -r 000000000000 -r 097bb3ed2b4d test-data/Lacombe_et_al_2017_OK.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Lacombe_et_al_2017_OK.txt Wed Aug 22 11:30:23 2018 -0400
@@ -0,0 +1,165 @@
+Protein accession number (UniProt) Protein name Number of peptides (razor + unique)
+P15924 Desmoplakin 69
+P02538 Keratin, type II cytoskeletal 6A 53
+P02768 Serum albumin 44
+P08779 Keratin, type I cytoskeletal 16 29
+Q02413 Desmoglein-1 24
+P07355 "Annexin A2;Putative annexin A2-like protein" 22
+P14923 Junction plakoglobin 22
+P02788 Lactotransferrin 21
+Q9HC84 Mucin-5B 21
+P29508 Serpin B3 20
+P63261 Actin, cytoplasmic 2 19
+Q8N1N4 Keratin, type II cytoskeletal 78 18
+Q04695 Keratin, type I cytoskeletal 17 18
+P01876 Ig alpha-1 chain C region 16
+Q01469 Fatty acid-binding protein 5, epidermal 15
+P31944 Caspase-14 15
+P01833 Polymeric immunoglobulin receptor 15
+P06733 Alpha-enolase 15
+P25311 Zinc-alpha-2-glycoprotein 15
+Q15149 Plectin 15
+P19013 Keratin, type II cytoskeletal 4 13
+Q6KB66 Keratin, type II cytoskeletal 80 13
+Q08188 Protein-glutamine gamma-glutamyltransferase E 12
+P13646 Keratin, type I cytoskeletal 13 11
+Q86YZ3 Hornerin 11
+P04259 Keratin, type II cytoskeletal 6B 10
+P02545 "Prelamin-A/C;Lamin-A/C" 10
+P04083 Annexin A1 10
+P11021 78 kDa glucose-regulated protein 10
+P02787 Serotransferrin 9
+P04040 Catalase 9
+P31151 Protein S100-A7 9
+P31947 14-3-3 protein sigma 9
+Q96P63 Serpin B12 9
+P14618 Pyruvate kinase PKM 9
+P60174 Triosephosphate isomerase 9
+Q06830 Peroxiredoxin-1 9
+P01040 Cystatin-A 8
+P05089 Arginase-1 8
+P01834 Ig kappa chain C region 8
+P04406 Glyceraldehyde-3-phosphate dehydrogenase 8
+P0DMV9 Heat shock 70 kDa protein 1B 8
+P13639 Elongation factor 2 8
+P35579 Myosin-9 8
+P68371 Tubulin beta-4B chain 8
+Q8WVV4 Protein POF1B 8
+O75635 Serpin B7 7
+P01857 Ig gamma-1 chain C region 7
+P61626 Lysozyme C 7
+P68363 Tubulin alpha-1B chain 7
+P01009 "Alpha-1-antitrypsin;Short peptide from AAT" 6
+P07900 Heat shock protein HSP 90-alpha 6
+Q9NZH8 Interleukin-36 gamma 6
+O43707 "Alpha-actinin-4;Alpha-actinin-1" 6
+O75223 Gamma-glutamylcyclotransferase 6
+P00338 L-lactate dehydrogenase A chain 6
+P07339 Cathepsin D 6
+P62987 Ubiquitin-60S ribosomal protein L40 6
+P10599 Thioredoxin 6
+Q9UGM3 Deleted in malignant brain tumors 1 protein 6
+Q9UI42 Carboxypeptidase A4 6
+P47929 Galectin-7 5
+Q13867 Bleomycin hydrolase 5
+Q6P4A8 Phospholipase B-like 1 5
+O75369 Filamin-B 5
+P00441 Superoxide dismutase [Cu-Zn] 5
+P04792 Heat shock protein beta-1 5
+P11142 Heat shock cognate 71 kDa protein 5
+P58107 Epiplakin 5
+P60842 Eukaryotic initiation factor 4A-I 5
+P62937 Peptidyl-prolyl cis-trans isomerase A 5
+P63104 14-3-3 protein zeta/delta 5
+Q92820 Gamma-glutamyl hydrolase 5
+O75342 Arachidonate 12-lipoxygenase, 12R-type 4
+P09211 Glutathione S-transferase P 4
+P31025 Lipocalin-1 4
+P48594 Serpin B4 4
+Q14574 Desmocollin-3 4
+Q5T750 Skin-specific protein 32 4
+Q6UWP8 Suprabasin 4
+O60911 Cathepsin L2 4
+P00558 Phosphoglycerate kinase 1 4
+P04075 Fructose-bisphosphate aldolase A 4
+P07384 Calpain-1 catalytic subunit 4
+P0CG05 Ig lambda-2 chain C regions 4
+P18206 Vinculin 4
+P62258 14-3-3 protein epsilon 4
+P68871 Hemoglobin subunit beta 4
+Q9C075 Keratin, type I cytoskeletal 23 4
+A8K2U0 Alpha-2-macroglobulin-like protein 1 3
+P00738 Haptoglobin 3
+P01011 Alpha-1-antichymotrypsin 3
+P02763 Alpha-1-acid glycoprotein 1 3
+P18510 Interleukin-1 receptor antagonist protein 3
+P22528 Cornifin-B 3
+P30740 Leukocyte elastase inhibitor 3
+P80188 Neutrophil gelatinase-associated lipocalin 3
+Q15828 Cystatin-M 3
+Q9HCY8 Protein S100-A14 3
+P01623 Ig kappa chain V-III region 3
+P01877 Ig alpha-2 chain C region 3
+P06396 Gelsolin 3
+P14735 Insulin-degrading enzyme 3
+P20933 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase 3
+P25788 Proteasome subunit alpha type-3 3
+P26641 Elongation factor 1-gamma 3
+P36952 Serpin B5 3
+P40926 Malate dehydrogenase, mitochondrial 3
+Q9Y6R7 IgGFc-binding protein 3
+O95274 Ly6/PLAUR domain-containing protein 3 2
+P00491 Purine nucleoside phosphorylase 2
+P04080 Cystatin-B 2
+P09972 Fructose-bisphosphate aldolase C 2
+P19012 Keratin, type I cytoskeletal 15 2
+P20930 Filaggrin 2
+Q96FX8 p53 apoptosis effector related to PMP-22 2
+Q9UIV8 Serpin B13 2
+P01625 Ig kappa chain V-IV region Len 2
+P01765 Ig heavy chain V-III region TIL 2
+P01766 Ig heavy chain V-III region BRO 2
+P01860 Ig gamma-3 chain C region 2
+P01871 Ig mu chain C region 2
+P05090 Apolipoprotein D 2
+P06870 Kallikrein-1 2
+P07858 Cathepsin B 2
+P08865 40S ribosomal protein SA 2
+P11279 Lysosome-associated membrane glycoprotein 1 2
+P13473 Lysosome-associated membrane glycoprotein 2 2
+P19971 Thymidine phosphorylase 2
+P23284 Peptidyl-prolyl cis-trans isomerase B 2
+P23396 40S ribosomal protein S3 2
+P25705 ATP synthase subunit alpha, mitochondrial 2
+P27482 Calmodulin-like protein 3 2
+P31949 Protein S100-A11 2
+P40121 Macrophage-capping protein 2
+P42357 Histidine ammonia-lyase 2
+P47756 F-actin-capping protein subunit beta 2
+P48637 Glutathione synthetase 2
+P49720 Proteasome subunit beta type-3 2
+P50395 Rab GDP dissociation inhibitor beta 2
+P59998 Actin-related protein 2/3 complex subunit 4 2
+P61160 Actin-related protein 2 2
+P61916 Epididymal secretory protein E1 2
+P04745 Alpha-amylase 1 23
+Q9NZT1 Calmodulin-like protein 5 8
+P12273 Prolactin-inducible protein 6
+Q96DA0 Zymogen granule protein 16 homolog B 5
+P01036 Cystatin-S 5
+Q8TAX7 Mucin-7 2
+P01037 Cystatin-SN 2
+P09228 Cystatin-SA 2
+P04264 Keratin, type II cytoskeletal 1 61
+P35908 Keratin, type II cytoskeletal 2 epidermal 40
+P13645 Keratin, type I cytoskeletal 10 40
+Q5D862 Filaggrin-2 14
+Q5T749 Keratinocyte proline-rich protein 13
+Q8IW75 Serpin A12 3
+P81605 Dermcidin 3
+P22531 Small proline-rich protein 2E 3
+P59666 Neutrophil defensin 3 2
+P78386 Keratin, type II cuticular Hb5 2
+
+
+
diff -r 000000000000 -r 097bb3ed2b4d test-data/hsa00010.pathview.png
Binary file test-data/hsa00010.pathview.png has changed
diff -r 000000000000 -r 097bb3ed2b4d test-data/hsa04514.pathview.png
Binary file test-data/hsa04514.pathview.png has changed
diff -r 000000000000 -r 097bb3ed2b4d test-data/hsa05167.pathview.png
Binary file test-data/hsa05167.pathview.png has changed
diff -r 000000000000 -r 097bb3ed2b4d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Aug 22 11:30:23 2018 -0400
@@ -0,0 +1,7 @@
+
+
+
+ dbkey,name,value,path
+
+
+
\ No newline at end of file