changeset 3:3bedd074c485 draft

planemo upload commit c6e1fd1f68e81ce9eea6ad66adee21070f2893ef-dirty
author proteore
date Wed, 06 Mar 2019 09:19:05 -0500
parents db2cd451835f
children 5cd79aa7aac9
files README.rst get_expression_profiles.R get_expression_profiles.xml proteore_protein_atlas_normal_tissue.loc.sample proteore_protein_atlas_tumor_tissue.loc.sample sel_ann_hpa.R sel_ann_hpa.xml tool-data/proteinatlas.loc.sample tool_data_table_conf.xml.sample
diffstat 9 files changed, 413 insertions(+), 409 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Thu Dec 06 10:07:13 2018 -0500
+++ b/README.rst	Wed Mar 06 09:19:05 2019 -0500
@@ -1,9 +1,9 @@
-Wrapper for Get expression data by tissue Tool
+Wrapper for Get expression profiles by tissue Tool
 =================================================
 
 **Authors**
 
-T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
 
 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_expression_profiles.R	Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,146 @@
+# Read file and return file content as data.frame
+read_file <- function(path,header){
+  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
+  if (inherits(file,"try-error")){
+    stop("File not found !")
+  }else{
+    return(file)
+  }
+}
+
+str2bool <- function(x){
+  if (any(is.element(c("t","true"),tolower(x)))){
+    return (TRUE)
+  }else if (any(is.element(c("f","false"),tolower(x)))){
+    return (FALSE)
+  }else{
+    return(NULL)
+  }
+}
+
+# input has to be a list of IDs in ENSG format
+# tissue is one of unique(HPA.normal.tissue$Tissue)
+# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low"
+# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain"
+annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) {
+  dat <- subset(HPA_normal_tissue, Gene %in% input)
+  res.Tissue<-subset(dat, Tissue %in% tissue) 
+  res.Level<-subset(res.Tissue, Level %in% level) 
+  res.Rel<-subset(res.Level, Reliability %in% reliability) 
+  
+  if (not_mapped_option) {
+    if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) {
+      not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene))))
+      not.match <- matrix("no match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs))
+      not.match <- cbind(not_match_IDs, unname(not.match))
+      colnames(not.match) <- colnames(HPA_normal_tissue)
+      res <- rbind(res.Rel, not.match)
+    } else {
+      res <- res.Rel
+    } 
+    
+    if (length(setdiff(input, unique(dat$Gene)))>0) {
+      not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene))))
+      not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
+      colnames(not.mapped) <- colnames(HPA_normal_tissue)
+      res <- rbind(res, not.mapped)
+    }
+    
+  } else {
+    res <- res.Rel
+  }
+  
+  return(res)
+  
+}
+
+annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) {
+  dat <- subset(HPA_cancer_tissue, Gene %in% input)
+  res.Cancer<-subset(dat, Cancer %in% cancer) 
+
+  if (not_mapped_option) {
+    not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene))))
+    not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
+    colnames(not.mapped) <- colnames(HPA_cancer_tissue)
+    res <- rbind(res.Cancer, not.mapped)
+  } else {
+    res <- res.Cancer
+  }
+  return(res)
+}
+
+
+main <- function() {
+  args <- commandArgs(TRUE)
+  if(length(args)<1) {
+    args <- c("--help")
+  }
+  
+  # Help section
+  if("--help" %in% args) {
+    cat("Selection and Annotation HPA
+    Arguments:
+        --ref_file: HPA normal/cancer tissue file path
+        --input_type: type of input (list of id or filename)
+        --input: list of IDs in ENSG format
+        --column_number: the column number which you would like to apply...
+        --header: true/false if your file contains a header
+        --atlas: normal/cancer
+          if normal:
+            --tissue: list of tissues
+            --level: Not detected, Low, Medium, High
+            --reliability: Supportive, Uncertain
+          if cancer:
+            --cancer: Cancer tissues
+        --not_mapped: true/false if your output file should contain not-mapped and not-match IDs 
+        --output: output filename \n")
+    q(save="no")
+  }
+  
+  # Parse arguments
+  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+  args <- as.list(as.character(argsDF$V2))
+  names(args) <- argsDF$V1
+  
+  #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda")
+  #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda")
+  
+  # Extract input
+  input_type = args$input_type
+  if (input_type == "list") {
+    list_id = strsplit(args$input, "[ \t\n]+")[[1]]
+  } else if (input_type == "file") {
+    filename = args$input
+    column_number = as.numeric(gsub("c", "" ,args$column_number))
+    header = str2bool(args$header)
+    file = read_file(filename, header)
+    list_id = sapply(strsplit(file[,column_number], ";"), "[", 1)
+  }
+  input = list_id
+
+  # Read reference file
+  reference_file = read_file(args$ref_file, TRUE)
+
+  # Extract other options
+  atlas = args$atlas
+  not_mapped_option = str2bool(args$not_mapped)
+  if (atlas=="normal") {
+    tissue = strsplit(args$tissue, ",")[[1]]
+    level = strsplit(args$level, ",")[[1]]
+    reliability = strsplit(args$reliability, ",")[[1]]
+    # Calculation
+    res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option)
+  } else if (atlas=="cancer") {
+    cancer = strsplit(args$cancer, ",")[[1]]
+    # Calculation
+    res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option)
+  }
+  
+  # Write output
+  output = args$output
+  res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))
+  write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE)
+}
+
+main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_expression_profiles.xml	Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,254 @@
+<tool id="sel_ann_hpa" name="Get expression profiles" version="2019.03.06">
+    <description>by (normal or tumor) tissue/cell type [Human Protein Atlas]
+    </description>
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="Rscript">
+        $__tool_directory__/get_expression_profiles.R
+        
+        #if "protein_atlas" in str($options.proteinatlas).split("/")
+          --ref_file="$options.proteinatlas"
+        #else
+          --ref_file="$__tool_directory__/$options.proteinatlas"
+        #end if
+
+        --input_type="$input.ids"
+        #if $input.ids == "list"
+            --input="$input.list"
+        #else
+            --input="$input.file"
+            --column_number="$input.ncol"
+            --header="$input.header"
+        #end if
+
+        #if $options.database == "normal"
+            --atlas="normal"
+            --tissue="$options.normal_tissue"
+            --level="$options.level"
+            --reliability="$options.reliability"
+        #else if $options.database == "tumor"
+            --atlas="cancer"
+            --cancer="$options.cancer_tissue"
+        #end if
+        --not_mapped="$not_mapped"
+        --output="$hpa_output"
+    </command>
+       <inputs>
+           <conditional name="input" >
+            <param name="ids" type="select" label="Enter your IDs (ENSG IDs only)" help="Copy/paste or from a file (e.g. table)" >
+                <option value="list">Copy/paste your IDs</option>
+                <option value="file" selected="true">Input file containing your IDs</option>
+            </param>
+            <when value="list" >
+                <param name="list" type="text" label="Copy/paste your IDs" help='IDs must be separated by spaces into the form field, for example: ENSG00000174876 ENSG00000178372 ENSG00000159763' >
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="__sq__"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file" >
+                <param name="file" type="data" format="txt,tabular" label="" help="" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+                <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
+                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+                </param>
+            </when>
+        </conditional>
+        <conditional name="options">
+            <param name="database" type="select" label="Human Protein Atlas (normal or tumor tissue)">
+                <option value="normal">Human Normal Tissue</option>
+                <option value="tumor">Human Tumor Tissue</option>
+            </param>
+            <when value="normal">
+                <param name="proteinatlas" type="select" label="Normal tissue HPA version" >
+                    <options from_data_table="proteore_protein_atlas_normal_tissue"/>
+	            </param>
+                <param name="normal_tissue" type="select" label="Select tissue(s)" multiple="True" display="checkboxes" optional="False">
+                    <option value="adrenal gland" >Adrenal gland</option>
+                    <option value="appendix" >Appendix</option>
+                    <option value="bone marrow" >Bone marrow</option>
+                    <option value="breast" >Breast</option>
+                    <option value="bronchus" >Bronchus</option>
+                    <option value="caudate" >Caudate</option>
+                    <option value="cerebellum" >Cerebellum</option>
+                    <option value="cerebral cortex" >Cerebral cortex</option>
+                    <option value="cervix" >Cervix</option>
+                    <option value="colon" >Colon</option>
+                    <option value="duodenum" >Duodenum</option>
+                    <option value="endometrium 1" >Endometrium 1</option>
+                    <option value="endometrium 2" >Endometrium 2</option>
+                    <option value="epididymis" >Epididymis</option>
+                    <option value="esophagus" >Esophagus</option>
+                    <option value="fallopian tube" >Fallopian tube</option>
+                    <option value="gallbladder" >Gallbladder</option>
+                    <option value="heart muscle" >Heart muscle</option>
+                    <option value="hippocampus" >Hippocampus</option>
+                    <option value="kidney" >Kidney</option>
+                    <option value="liver" >Liver</option>
+                    <option value="lung" >Lung</option>
+                    <option value="lymph node" >Lymph node</option>
+                    <option value="nasopharynx" >Nasopharynx</option>
+                    <option value="oral mucosa" >Oral mucosa</option>
+                    <option value="ovary" >Ovary</option>
+                    <option value="pancreas" >Pancreas</option>
+                    <option value="parathyroid gland" >Parathyroid gland</option>
+                    <option value="placenta" >Placenta</option>
+                    <option value="prostate" >Prostate</option>
+                    <option value="rectum" >Rectum</option>
+                    <option value="salivary gland" >Salivary gland</option>
+                    <option value="seminal vesicle" >Seminal vesicle</option>
+                    <option value="skeletal muscle" >Skeletal muscle</option>
+                    <option value="skin 1" >Skin 1</option>
+                    <option value="skin 2" >Skin 2</option>
+                    <option value="small intestine" >Small intestine</option>
+                    <option value="smooth muscle" >Smooth muscle</option>
+                    <option value="soft tissue 1" >Soft tissue 1</option>
+                    <option value="soft tissue 2" >Soft tissue 2</option>
+                    <option value="spleen" >Spleen</option>
+                    <option value="stomach 1" >Stomach 1</option>
+                    <option value="stomach 2" >Stomach 2</option>
+                    <option value="testis" >Testis</option>
+                    <option value="thyroid gland" >Thyroid gland</option>
+                    <option value="tonsil" >Tonsil</option>
+                    <option value="urinary bladder" >Urinary bladder</option>
+                    <option value="vagina" >Vagina</option>
+                </param>
+                <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False">
+                    <option value="High" selected="true">High</option>
+                    <option value="Medium">Medium</option>
+                    <option value="Low">Low</option>
+                    <option value="Not detected">Not detected</option>
+                </param>
+                <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False">
+                    <option value="Enhanced" selected="true">Enhanced</option>
+                    <option value="Supported" selected="true">Supported</option>
+                    <option value="Approved">Approved</option>
+                    <option value="Uncertain">Uncertain</option>
+                </param>
+            </when>
+            <when value="tumor">
+                <param name="proteinatlas" type="select" label="Tumor tissue HPA version" >
+                    <options from_data_table="proteore_protein_atlas_tumor_tissue"/>
+	            </param>
+                <param name="cancer_tissue" type="select" label="Select cancer tissue(s)" multiple="True" display="checkboxes" optional="False">
+                    <option value="breast cancer" >Breast cancer</option>
+                    <option value="carcinoid" >Carcinoid</option>
+                    <option value="cervical cancer" >Cervical cancer</option>
+                    <option value="colorectal cancer" >Colorectal cancer</option>
+                    <option value="endometrial cancer" >Endometrial cancer</option>
+                    <option value="glioma" >Glioma</option>
+                    <option value="head and neck cancer" >Head and neck cancer</option>
+                    <option value="liver cancer" >Liver cancer</option>
+                    <option value="lung cancer" >Lung cancer</option>
+                    <option value="lymphoma" >Lymphoma</option>
+                    <option value="melanoma" >Melanoma</option>
+                    <option value="ovarian cancer" >Ovarian cancer</option>
+                    <option value="pancreatic cancer" >Pancreatic cancer</option>
+                    <option value="prostate cancer" >Prostate cancer</option>
+                    <option value="renal cancer" >Renal cancer</option>
+                    <option value="skin cancer" >Skin cancer</option>
+                    <option value="stomach cancer" >Stomach cancer</option>
+                    <option value="testis cancer" >Testis cancer</option>
+                    <option value="thyroid cancer" >Thyroid cancer</option>
+                    <option value="urothelial cancer" >Urothelial cancer</option>
+                </param>
+            </when>
+        </conditional>
+        <param name="not_mapped" type="boolean" truevalue="true" falsevalue="false" label="Keep IDs not found in HPA?" checked="true"/>
+    </inputs>
+    <outputs>
+        <data name="hpa_output" format="tsv" label="" />
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="ids" value="file"/>
+                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/>
+                <param name="header" value="true"/>
+                <param name="ncol" value="c8"/>
+            </conditional>
+            <conditional name="options">
+                <param name="database" value="normal"/>
+                <param name="proteinatlas" value="normal_tissue.tsv"/>
+                <param name="normal_tissue" value="bronchus,lung,nasopharynx,salivary gland"/>
+                <param name="level" value="Not detected,Medium,High,Low"/>
+                <param name="reliability" value="Approved,Supported,Uncertain"/>
+            </conditional>
+            <param name="not_mapped" value="true" />
+            <output name="hpa_output" file="Expres_levels_Lacombe_et_al_2017_OK.txt"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**Description**
+
+This tool allows to retrieve expression profiles (normal or tumor tissue) from Human Protein Atlas (https://www.proteinatlas.org/) 
+ 
+**Input**
+
+A list of ENSG (Ensembl gene) IDs must be entered (either via a copy/paste or by choosing a file); if it's not the case, please use the ID_Converter tool of ProteoRE.
+
+-----
+
+**Parameters**
+
+"Human Protein Atlas (normal or tumor tissue)": two resources are currently available 
+
+* **Human normal tissue data**: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays.
+
+**Output** will be in the form: a tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability"). 
+ 
+* **Human tumor tissue data**: staining profiles for proteins in human tumor tissue based on immunohistochemisty using tissue micro arrays and log-rank P value for Kaplan-Meier analysis of correlation between mRNA expression level and patient survival. 
+
+**Ouptut** will be in the form: The tab-separated file includes Ensembl gene identifier ("Gene"), gene name ("Gene name"), tumor name ("Cancer"), the number of patients annotated for different staining levels ("High", "Medium", "Low" & "Not detected") and log-rank p values for patient survival and mRNA correlation ("prognostic - favourable", "unprognostic - favourable", "prognostic - unfavourable", "unprognostic - unfavourable").
+
+"Select tissue(s)": information from more than one tissue can be retrieved 
+
+"Keep IDs not found in HPA?": ENSG ID not found in Human Protein Atlas will be returned in the output file in the form of "NA" (default is "No")
+
+-----
+
+**Reliability score (only for normal tissue)**
+
+Reliability score is divided into Enhanced, Supported, Approved, or Uncertain with respect to the definitions from HPA:
+
+Enhanced - One or several antibodies with non-overlapping epitopes targeting the same gene have obtained enhanced validation based on orthogonal or independent antibody validation method.
+
+Supported - Consistency with RNA-seq and/or protein/gene characterization data, in combination with similar staining pattern if independent antibodies are available.
+
+Approved - Consistency with RNA-seq data in combination with inconsistency with, or lack of, protein/gene characterization data. Alternatively, consistency with protein/gene characterization data in combination with inconsistency with RNA-seq data. If independent antibodies are available, the staining pattern is partly similar or dissimilar.
+
+Uncertain - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, in combination with dissimilar staining pattern if independent antibodies are available.
+
+-----
+
+**Data sources (release date)**
+
+Both normal and tumor tissues data are based on the Human Protein Atlas version 18 and Ensembl version 88.38.
+
+-----
+
+.. class:: infomark
+
+**Authors**
+
+David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+        
+    ]]></help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/proteore_protein_atlas_normal_tissue.loc.sample	Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,2 @@
+#<id>	<name>	<tissue>	<value>
+HPA_normal_tissue_19-07-2018	HPA normal tissue 19/07/2018	HPA_normal_tissue	/tool-data/HPA_normal_tissue_23-10-2018.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/proteore_protein_atlas_tumor_tissue.loc.sample	Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,2 @@
+#<id>	<name>	<tissue>	<value>
+HPA_pathology_19-07-2018	HPA pathology 19/07/2018	HPA_pathology	/tool-data/HPA_pathology_23-10-2018.tsv
--- a/sel_ann_hpa.R	Thu Dec 06 10:07:13 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,146 +0,0 @@
-# Read file and return file content as data.frame
-read_file <- function(path,header){
-  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
-  if (inherits(file,"try-error")){
-    stop("File not found !")
-  }else{
-    return(file)
-  }
-}
-
-str2bool <- function(x){
-  if (any(is.element(c("t","true"),tolower(x)))){
-    return (TRUE)
-  }else if (any(is.element(c("f","false"),tolower(x)))){
-    return (FALSE)
-  }else{
-    return(NULL)
-  }
-}
-
-# input has to be a list of IDs in ENSG format
-# tissue is one of unique(HPA.normal.tissue$Tissue)
-# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low"
-# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain"
-annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) {
-  dat <- subset(HPA_normal_tissue, Gene %in% input)
-  res.Tissue<-subset(dat, Tissue %in% tissue) 
-  res.Level<-subset(res.Tissue, Level %in% level) 
-  res.Rel<-subset(res.Level, Reliability %in% reliability) 
-  
-  if (not_mapped_option) {
-    if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) {
-      not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene))))
-      not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs))
-      not.match <- cbind(not_match_IDs, unname(not.match))
-      colnames(not.match) <- colnames(HPA_normal_tissue)
-      res <- rbind(res.Rel, not.match)
-    } else {
-      res <- res.Rel
-    } 
-    
-    if (length(setdiff(input, unique(dat$Gene)))>0) {
-      not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene))))
-      not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
-      colnames(not.mapped) <- colnames(HPA_normal_tissue)
-      res <- rbind(res, not.mapped)
-    }
-    
-  } else {
-    res <- res.Rel
-  }
-  
-  return(res)
-  
-}
-
-annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) {
-  dat <- subset(HPA_cancer_tissue, Gene %in% input)
-  res.Cancer<-subset(dat, Cancer %in% cancer) 
-
-  if (not_mapped_option) {
-    not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene))))
-    not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
-    colnames(not.mapped) <- colnames(HPA_cancer_tissue)
-    res <- rbind(res.Cancer, not.mapped)
-  } else {
-    res <- res.Cancer
-  }
-  return(res)
-}
-
-
-main <- function() {
-  args <- commandArgs(TRUE)
-  if(length(args)<1) {
-    args <- c("--help")
-  }
-  
-  # Help section
-  if("--help" %in% args) {
-    cat("Selection and Annotation HPA
-    Arguments:
-        --ref_file: HPA normal/cancer tissue file path
-        --input_type: type of input (list of id or filename)
-        --input: list of IDs in ENSG format
-        --column_number: the column number which you would like to apply...
-        --header: true/false if your file contains a header
-        --atlas: normal/cancer
-          if normal:
-            --tissue: list of tissues
-            --level: Not detected, Low, Medium, High
-            --reliability: Supportive, Uncertain
-          if cancer:
-            --cancer: Cancer tissues
-        --not_mapped: true/false if your output file should contain not-mapped and not-match IDs 
-        --output: output filename \n")
-    q(save="no")
-  }
-  
-  # Parse arguments
-  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
-  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
-  args <- as.list(as.character(argsDF$V2))
-  names(args) <- argsDF$V1
-  
-  #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda")
-  #load("/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda")
-  
-  # Extract input
-  input_type = args$input_type
-  if (input_type == "list") {
-    list_id = strsplit(args$input, "[ \t\n]+")[[1]]
-  } else if (input_type == "file") {
-    filename = args$input
-    column_number = as.numeric(gsub("c", "" ,args$column_number))
-    header = str2bool(args$header)
-    file = read_file(filename, header)
-    list_id = sapply(strsplit(file[,column_number], ";"), "[", 1)
-  }
-  input = list_id
-
-  # Read reference file
-  reference_file = read_file(args$ref_file, TRUE)
-
-  # Extract other options
-  atlas = args$atlas
-  not_mapped_option = str2bool(args$not_mapped)
-  if (atlas=="normal") {
-    tissue = strsplit(args$tissue, ",")[[1]]
-    level = strsplit(args$level, ",")[[1]]
-    reliability = strsplit(args$reliability, ",")[[1]]
-    # Calculation
-    res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option)
-  } else if (atlas=="cancer") {
-    cancer = strsplit(args$cancer, ",")[[1]]
-    # Calculation
-    res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option)
-  }
-  
-  # Write output
-  output = args$output
-  res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))
-  write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE)
-}
-
-main()
--- a/sel_ann_hpa.xml	Thu Dec 06 10:07:13 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,252 +0,0 @@
-<tool id="sel_ann_hpa" name="Get human expression data" version="2018.12.06.1">
-    <description>by tissue (normal or tumor tissue Human Protein Atlas)
-    </description>
-    <requirements>
-    </requirements>
-    <stdio>
-        <exit_code range="1:" />
-    </stdio>
-    <command interpreter="Rscript">
-        $__tool_directory__/sel_ann_hpa.R
-        --ref_file="$__tool_directory__/$options.proteinatlas"
-        --input_type="$input.ids"
-        #if $input.ids == "list"
-            --input="$input.list"
-        #else
-            --input="$input.file"
-            --column_number="$input.ncol"
-            --header="$input.header"
-        #end if
-
-        #if $options.database == "normal"
-            --atlas="normal"
-            --tissue="$options.normal_tissue"
-            --level="$options.level"
-            --reliability="$options.reliability"
-        #else if $options.database == "tumor"
-            --atlas="cancer"
-            --cancer="$options.cancer_tissue"
-        #end if
-        --not_mapped="$not_mapped"
-        --output="$hpa_output"
-    </command>
-       <inputs>
-           <conditional name="input" >
-            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
-                <option value="list">Copy/paste your identifiers</option>
-                <option value="file" selected="true">Input file containing your identifiers</option>
-            </param>
-            <when value="list" >
-                <param name="list" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: ENSG00000174876 ENSG00000178372 ENSG00000159763' >
-                    <sanitizer>
-                        <valid initial="string.printable">
-                            <remove value="&apos;"/>
-                        </valid>
-                        <mapping initial="none">
-                            <add source="&apos;" target="__sq__"/>
-                        </mapping>
-                    </sanitizer>
-                </param>
-            </when>
-            <when value="file" >
-                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of ENSG IDs" help="" />
-                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
-                <param name="ncol" type="text" value="c1" label="The column number of ENSG IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
-            </when>
-        </conditional>
-        <conditional name="options">
-            <param name="database" type="select" label="Human Protein Atlas (data from normal or tumor tissue)">
-                <option value="normal">Human Normal Tissue</option>
-                <option value="tumor">Human Tumor Tissue</option>
-            </param>
-            <when value="normal">
-                <param name="proteinatlas" type="select" label="Human Protein Atlas" >
-                    <!--options from_file="proteinatlas.loc" >
-                        <column name="name" index="1"/>
-                        <column name="value" index="2"/>
-                        <filter type="remove_value" key="name" value="Full Human Protein Atlas (23/10/18)"/>
-                        <filter type="remove_value" key="name" value="HPA Tumor Tissue (23/10/18)"/>
-	                </options-->
-                    <option value="tool-data/HPA_normal_tissue_23-10-2018.tsv">HPA Normal Tissue (23/10/18)</option>
-	            </param>
-                <param name="normal_tissue" type="select" label="Select tissue(s)" multiple="True" display="checkboxes" optional="False">
-                    <option value="adrenal gland" >Adrenal gland</option>
-                    <option value="appendix" >Appendix</option>
-                    <option value="bone marrow" >Bone marrow</option>
-                    <option value="breast" >Breast</option>
-                    <option value="bronchus" >Bronchus</option>
-                    <option value="caudate" >Caudate</option>
-                    <option value="cerebellum" >Cerebellum</option>
-                    <option value="cerebral cortex" >Cerebral cortex</option>
-                    <option value="cervix" >Cervix</option>
-                    <option value="colon" >Colon</option>
-                    <option value="duodenum" >Duodenum</option>
-                    <option value="endometrium 1" >Endometrium 1</option>
-                    <option value="endometrium 2" >Endometrium 2</option>
-                    <option value="epididymis" >Epididymis</option>
-                    <option value="esophagus" >Esophagus</option>
-                    <option value="fallopian tube" >Fallopian tube</option>
-                    <option value="gallbladder" >Gallbladder</option>
-                    <option value="heart muscle" >Heart muscle</option>
-                    <option value="hippocampus" >Hippocampus</option>
-                    <option value="kidney" >Kidney</option>
-                    <option value="liver" >Liver</option>
-                    <option value="lung" >Lung</option>
-                    <option value="lymph node" >Lymph node</option>
-                    <option value="nasopharynx" >Nasopharynx</option>
-                    <option value="oral mucosa" >Oral mucosa</option>
-                    <option value="ovary" >Ovary</option>
-                    <option value="pancreas" >Pancreas</option>
-                    <option value="parathyroid gland" >Parathyroid gland</option>
-                    <option value="placenta" >Placenta</option>
-                    <option value="prostate" >Prostate</option>
-                    <option value="rectum" >Rectum</option>
-                    <option value="salivary gland" >Salivary gland</option>
-                    <option value="seminal vesicle" >Seminal vesicle</option>
-                    <option value="skeletal muscle" >Skeletal muscle</option>
-                    <option value="skin 1" >Skin 1</option>
-                    <option value="skin 2" >Skin 2</option>
-                    <option value="small intestine" >Small intestine</option>
-                    <option value="smooth muscle" >Smooth muscle</option>
-                    <option value="soft tissue 1" >Soft tissue 1</option>
-                    <option value="soft tissue 2" >Soft tissue 2</option>
-                    <option value="spleen" >Spleen</option>
-                    <option value="stomach 1" >Stomach 1</option>
-                    <option value="stomach 2" >Stomach 2</option>
-                    <option value="testis" >Testis</option>
-                    <option value="thyroid gland" >Thyroid gland</option>
-                    <option value="tonsil" >Tonsil</option>
-                    <option value="urinary bladder" >Urinary bladder</option>
-                    <option value="vagina" >Vagina</option>
-                </param>
-                <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False">
-                    <option value="High" selected="true">High</option>
-                    <option value="Medium">Medium</option>
-                    <option value="Low">Low</option>
-                    <option value="Not detected">Not detected</option>
-                </param>
-                <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False">
-                    <option value="Enhanced" selected="true">Enhanced</option>
-                    <option value="Supported" selected="true">Supported</option>
-                    <option value="Approved">Approved</option>
-                    <option value="Uncertain">Uncertain</option>
-                </param>
-            </when>
-            <when value="tumor">
-                <param name="proteinatlas" type="select" label="Human Protein Atlas" >
-                    <!--options from_file="proteinatlas.loc" >
-		                <column name="name" index="1" />
-		                <column name="value" index="2" />
-		                <filter type="remove_value" meta_ref="proteinatlas" key="name" value="Full Human Protein Atlas (23/10/18)" />
-                        <filter type="remove_value" meta_ref="proteinatlas" key="name" value="HPA Normal Tissue (23/10/18)" />
-	                </options-->
-                    <option value="tool-data/HPA_pathology_23-10-2018.tsv">HPA Tumor Tissue (23/10/18)</option>
-	            </param>
-                <param name="cancer_tissue" type="select" label="Keep and annotate genes present in the following tissue(s)" multiple="True" display="checkboxes" optional="False">
-                    <option value="breast cancer" >Breast cancer</option>
-                    <option value="carcinoid" >Carcinoid</option>
-                    <option value="cervical cancer" >Cervical cancer</option>
-                    <option value="colorectal cancer" >Colorectal cancer</option>
-                    <option value="endometrial cancer" >Endometrial cancer</option>
-                    <option value="glioma" >Glioma</option>
-                    <option value="head and neck cancer" >Head and neck cancer</option>
-                    <option value="liver cancer" >Liver cancer</option>
-                    <option value="lung cancer" >Lung cancer</option>
-                    <option value="lymphoma" >Lymphoma</option>
-                    <option value="melanoma" >Melanoma</option>
-                    <option value="ovarian cancer" >Ovarian cancer</option>
-                    <option value="pancreatic cancer" >Pancreatic cancer</option>
-                    <option value="prostate cancer" >Prostate cancer</option>
-                    <option value="renal cancer" >Renal cancer</option>
-                    <option value="skin cancer" >Skin cancer</option>
-                    <option value="stomach cancer" >Stomach cancer</option>
-                    <option value="testis cancer" >Testis cancer</option>
-                    <option value="thyroid cancer" >Thyroid cancer</option>
-                    <option value="urothelial cancer" >Urothelial cancer</option>
-                </param>
-            </when>
-        </conditional>
-        <param name="not_mapped" type="boolean" truevalue="true" falsevalue="false" label="Would you like to include in output the IDs that do not match criteria or not mapped in HPA?" checked="true"/>
-    </inputs>
-    <outputs>
-        <data name="hpa_output" format="tsv" label="" />
-    </outputs>
-    <tests>
-        <test>
-            <conditional name="input">
-                <param name="ids" value="file"/>
-                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/>
-                <param name="header" value="true"/>
-                <param name="ncol" value="c8"/>
-            </conditional>
-            <conditional name="options">
-                <param name="database" value="normal"/>
-                <param name="proteinatlas" value="normal_tissue.tsv"/>
-                <param name="normal_tissue" value="bronchus,lung,nasopharynx,salivary gland"/>
-                <param name="level" value="Not detected,Medium,High,Low"/>
-                <param name="reliability" value="Approved,Supported,Uncertain"/>
-            </conditional>
-            <param name="not_mapped" value="true" />
-            <output name="hpa_output" file="Expres_levels_Lacombe_et_al_2017_OK.txt"/>
-        </test>
-    </tests>
-    <help><![CDATA[
-This tool retrieve information from Human Protein Atlas (https://www.proteinatlas.org/) 
-regarding the expression profiles of human genes both on the mRNA and protein level. 
-
-A list of ENSG (Ensembl gene) IDs must be entered (either via a copy/paste or by choosing a file), 
-if it's not the case, please use the ID_Convert tool from ProteoRE.
-
-The resources from Human Protein Atlas that can be queried are the following: 
-
-* **Human normal tissue data**: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays.
-
-  The tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability"). 
-
-  The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38.
-
-* **Human tumor tissue data**: staining profiles for proteins in human tumor tissue based on immunohistochemisty using tissue micro arrays and log-rank P value for Kaplan-Meier analysis of correlation between mRNA expression level and patient survival. 
-
-  The tab-separated file includes Ensembl gene identifier ("Gene"), gene name ("Gene name"), tumor name ("Cancer"), the number of patients annotated for different staining levels ("High", "Medium", "Low" & "Not detected") and log-rank p values for patient survival and mRNA correlation ("prognostic - favourable", "unprognostic - favourable", "prognostic - unfavourable", "unprognostic - unfavourable").
-
-  The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38.
-
------
-
-**Reliability score**
-
-Reliability score is divided into Enhanced, Supported, Approved, or Uncertain with respect 
-to the definitions from HPA:
-
-Enhanced - One or several antibodies with non-overlapping epitopes targeting the same gene 
-have obtained enhanced validation based on orthogonal or independent antibody validation method.
-
-Supported - Consistency with RNA-seq and/or protein/gene characterization data, 
-in combination with similar staining pattern if independent antibodies are available.
-
-Approved - Consistency with RNA-seq data in combination with inconsistency with, or lack of, 
-protein/gene characterization data. Alternatively, consistency with protein/gene characterization data 
-in combination with inconsistency with RNA-seq data. If independent antibodies are available, 
-the staining pattern is partly similar or dissimilar.
-
-Uncertain - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, 
-in combination with dissimilar staining pattern if independent antibodies are available.
-
------
-
-.. class:: infomark
-
-**Authors**
-
-T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
-
-Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
-
-This work has been partially funded through the French National Agency for Research (ANR) IFB project.
-
-Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
-        
-    ]]></help>
-    <citations>
-    </citations>
-</tool>
--- a/tool-data/proteinatlas.loc.sample	Thu Dec 06 10:07:13 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#This file lists the locations and dbkeys of Human Protein Atlas files.
-#<name>	<display_name (value)>          <file_path>
-#full	Full Human Protein Atlas (23/10/18)	tool-data/HPA_full_atlas_23-10-2018.tsv
-normal_tissue	HPA Normal Tissue (23/10/18)	tool-data/HPA_normal_tissue_23-10-2018.tsv
-pathology	HPA Tumor Tissue (23/10/18)	tool-data/HPA_pathology_23-10-2018.tsv
--- a/tool_data_table_conf.xml.sample	Thu Dec 06 10:07:13 2018 -0500
+++ b/tool_data_table_conf.xml.sample	Wed Mar 06 09:19:05 2019 -0500
@@ -1,7 +1,10 @@
 <tables>
-    <!-- Location of Human Protein Atlas -->
-    <table name="proteinatlas" comment_char="#">
-      <columns>name, value, path</columns>
-      <file path="tool-data/proteinatlas.loc" />
+    <table name="proteore_protein_atlas_normal_tissue" comment_char="#">
+      <columns>id, name, tissue, value</columns>
+      <file path="tool-data/proteore_protein_atlas_normal_tissue.loc" />
+    </table>
+    <table name="proteore_protein_atlas_tumor_tissue" comment_char="#">
+      <columns>id, name, tissue, value</columns>
+      <file path="tool-data/proteore_protein_atlas_tumor_tissue.loc" />
     </table>
 </tables>
\ No newline at end of file