Mercurial > repos > proteore > proteore_expression_levels_by_tissue
changeset 3:3bedd074c485 draft
planemo upload commit c6e1fd1f68e81ce9eea6ad66adee21070f2893ef-dirty
| author | proteore |
|---|---|
| date | Wed, 06 Mar 2019 09:19:05 -0500 |
| parents | db2cd451835f |
| children | 5cd79aa7aac9 |
| files | README.rst get_expression_profiles.R get_expression_profiles.xml proteore_protein_atlas_normal_tissue.loc.sample proteore_protein_atlas_tumor_tissue.loc.sample sel_ann_hpa.R sel_ann_hpa.xml tool-data/proteinatlas.loc.sample tool_data_table_conf.xml.sample |
| diffstat | 9 files changed, 413 insertions(+), 409 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Thu Dec 06 10:07:13 2018 -0500 +++ b/README.rst Wed Mar 06 09:19:05 2019 -0500 @@ -1,9 +1,9 @@ -Wrapper for Get expression data by tissue Tool +Wrapper for Get expression profiles by tissue Tool ================================================= **Authors** -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_expression_profiles.R Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,146 @@ +# Read file and return file content as data.frame +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +# input has to be a list of IDs in ENSG format +# tissue is one of unique(HPA.normal.tissue$Tissue) +# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low" +# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain" +annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) { + dat <- subset(HPA_normal_tissue, Gene %in% input) + res.Tissue<-subset(dat, Tissue %in% tissue) + res.Level<-subset(res.Tissue, Level %in% level) + res.Rel<-subset(res.Level, Reliability %in% reliability) + + if (not_mapped_option) { + if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) { + not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))) + not.match <- matrix("no match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs)) + not.match <- cbind(not_match_IDs, unname(not.match)) + colnames(not.match) <- colnames(HPA_normal_tissue) + res <- rbind(res.Rel, not.match) + } else { + res <- res.Rel + } + + if (length(setdiff(input, unique(dat$Gene)))>0) { + not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene)))) + not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) + colnames(not.mapped) <- colnames(HPA_normal_tissue) + res <- rbind(res, not.mapped) + } + + } else { + res <- res.Rel + } + + return(res) + +} + +annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) { + dat <- subset(HPA_cancer_tissue, Gene %in% input) + res.Cancer<-subset(dat, Cancer %in% cancer) + + if (not_mapped_option) { + not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene)))) + not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) + colnames(not.mapped) <- colnames(HPA_cancer_tissue) + res <- rbind(res.Cancer, not.mapped) + } else { + res <- res.Cancer + } + return(res) +} + + +main <- function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --ref_file: HPA normal/cancer tissue file path + --input_type: type of input (list of id or filename) + --input: list of IDs in ENSG format + --column_number: the column number which you would like to apply... + --header: true/false if your file contains a header + --atlas: normal/cancer + if normal: + --tissue: list of tissues + --level: Not detected, Low, Medium, High + --reliability: Supportive, Uncertain + if cancer: + --cancer: Cancer tissues + --not_mapped: true/false if your output file should contain not-mapped and not-match IDs + --output: output filename \n") + q(save="no") + } + + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda") + + # Extract input + input_type = args$input_type + if (input_type == "list") { + list_id = strsplit(args$input, "[ \t\n]+")[[1]] + } else if (input_type == "file") { + filename = args$input + column_number = as.numeric(gsub("c", "" ,args$column_number)) + header = str2bool(args$header) + file = read_file(filename, header) + list_id = sapply(strsplit(file[,column_number], ";"), "[", 1) + } + input = list_id + + # Read reference file + reference_file = read_file(args$ref_file, TRUE) + + # Extract other options + atlas = args$atlas + not_mapped_option = str2bool(args$not_mapped) + if (atlas=="normal") { + tissue = strsplit(args$tissue, ",")[[1]] + level = strsplit(args$level, ",")[[1]] + reliability = strsplit(args$reliability, ",")[[1]] + # Calculation + res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option) + } else if (atlas=="cancer") { + cancer = strsplit(args$cancer, ",")[[1]] + # Calculation + res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option) + } + + # Write output + output = args$output + res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)) + write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) +} + +main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_expression_profiles.xml Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,254 @@ +<tool id="sel_ann_hpa" name="Get expression profiles" version="2019.03.06"> + <description>by (normal or tumor) tissue/cell type [Human Protein Atlas] + </description> + <requirements> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="Rscript"> + $__tool_directory__/get_expression_profiles.R + + #if "protein_atlas" in str($options.proteinatlas).split("/") + --ref_file="$options.proteinatlas" + #else + --ref_file="$__tool_directory__/$options.proteinatlas" + #end if + + --input_type="$input.ids" + #if $input.ids == "list" + --input="$input.list" + #else + --input="$input.file" + --column_number="$input.ncol" + --header="$input.header" + #end if + + #if $options.database == "normal" + --atlas="normal" + --tissue="$options.normal_tissue" + --level="$options.level" + --reliability="$options.reliability" + #else if $options.database == "tumor" + --atlas="cancer" + --cancer="$options.cancer_tissue" + #end if + --not_mapped="$not_mapped" + --output="$hpa_output" + </command> + <inputs> + <conditional name="input" > + <param name="ids" type="select" label="Enter your IDs (ENSG IDs only)" help="Copy/paste or from a file (e.g. table)" > + <option value="list">Copy/paste your IDs</option> + <option value="file" selected="true">Input file containing your IDs</option> + </param> + <when value="list" > + <param name="list" type="text" label="Copy/paste your IDs" help='IDs must be separated by spaces into the form field, for example: ENSG00000174876 ENSG00000178372 ENSG00000159763' > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </when> + <when value="file" > + <param name="file" type="data" format="txt,tabular" label="" help="" /> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> + <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> + </param> + </when> + </conditional> + <conditional name="options"> + <param name="database" type="select" label="Human Protein Atlas (normal or tumor tissue)"> + <option value="normal">Human Normal Tissue</option> + <option value="tumor">Human Tumor Tissue</option> + </param> + <when value="normal"> + <param name="proteinatlas" type="select" label="Normal tissue HPA version" > + <options from_data_table="proteore_protein_atlas_normal_tissue"/> + </param> + <param name="normal_tissue" type="select" label="Select tissue(s)" multiple="True" display="checkboxes" optional="False"> + <option value="adrenal gland" >Adrenal gland</option> + <option value="appendix" >Appendix</option> + <option value="bone marrow" >Bone marrow</option> + <option value="breast" >Breast</option> + <option value="bronchus" >Bronchus</option> + <option value="caudate" >Caudate</option> + <option value="cerebellum" >Cerebellum</option> + <option value="cerebral cortex" >Cerebral cortex</option> + <option value="cervix" >Cervix</option> + <option value="colon" >Colon</option> + <option value="duodenum" >Duodenum</option> + <option value="endometrium 1" >Endometrium 1</option> + <option value="endometrium 2" >Endometrium 2</option> + <option value="epididymis" >Epididymis</option> + <option value="esophagus" >Esophagus</option> + <option value="fallopian tube" >Fallopian tube</option> + <option value="gallbladder" >Gallbladder</option> + <option value="heart muscle" >Heart muscle</option> + <option value="hippocampus" >Hippocampus</option> + <option value="kidney" >Kidney</option> + <option value="liver" >Liver</option> + <option value="lung" >Lung</option> + <option value="lymph node" >Lymph node</option> + <option value="nasopharynx" >Nasopharynx</option> + <option value="oral mucosa" >Oral mucosa</option> + <option value="ovary" >Ovary</option> + <option value="pancreas" >Pancreas</option> + <option value="parathyroid gland" >Parathyroid gland</option> + <option value="placenta" >Placenta</option> + <option value="prostate" >Prostate</option> + <option value="rectum" >Rectum</option> + <option value="salivary gland" >Salivary gland</option> + <option value="seminal vesicle" >Seminal vesicle</option> + <option value="skeletal muscle" >Skeletal muscle</option> + <option value="skin 1" >Skin 1</option> + <option value="skin 2" >Skin 2</option> + <option value="small intestine" >Small intestine</option> + <option value="smooth muscle" >Smooth muscle</option> + <option value="soft tissue 1" >Soft tissue 1</option> + <option value="soft tissue 2" >Soft tissue 2</option> + <option value="spleen" >Spleen</option> + <option value="stomach 1" >Stomach 1</option> + <option value="stomach 2" >Stomach 2</option> + <option value="testis" >Testis</option> + <option value="thyroid gland" >Thyroid gland</option> + <option value="tonsil" >Tonsil</option> + <option value="urinary bladder" >Urinary bladder</option> + <option value="vagina" >Vagina</option> + </param> + <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False"> + <option value="High" selected="true">High</option> + <option value="Medium">Medium</option> + <option value="Low">Low</option> + <option value="Not detected">Not detected</option> + </param> + <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False"> + <option value="Enhanced" selected="true">Enhanced</option> + <option value="Supported" selected="true">Supported</option> + <option value="Approved">Approved</option> + <option value="Uncertain">Uncertain</option> + </param> + </when> + <when value="tumor"> + <param name="proteinatlas" type="select" label="Tumor tissue HPA version" > + <options from_data_table="proteore_protein_atlas_tumor_tissue"/> + </param> + <param name="cancer_tissue" type="select" label="Select cancer tissue(s)" multiple="True" display="checkboxes" optional="False"> + <option value="breast cancer" >Breast cancer</option> + <option value="carcinoid" >Carcinoid</option> + <option value="cervical cancer" >Cervical cancer</option> + <option value="colorectal cancer" >Colorectal cancer</option> + <option value="endometrial cancer" >Endometrial cancer</option> + <option value="glioma" >Glioma</option> + <option value="head and neck cancer" >Head and neck cancer</option> + <option value="liver cancer" >Liver cancer</option> + <option value="lung cancer" >Lung cancer</option> + <option value="lymphoma" >Lymphoma</option> + <option value="melanoma" >Melanoma</option> + <option value="ovarian cancer" >Ovarian cancer</option> + <option value="pancreatic cancer" >Pancreatic cancer</option> + <option value="prostate cancer" >Prostate cancer</option> + <option value="renal cancer" >Renal cancer</option> + <option value="skin cancer" >Skin cancer</option> + <option value="stomach cancer" >Stomach cancer</option> + <option value="testis cancer" >Testis cancer</option> + <option value="thyroid cancer" >Thyroid cancer</option> + <option value="urothelial cancer" >Urothelial cancer</option> + </param> + </when> + </conditional> + <param name="not_mapped" type="boolean" truevalue="true" falsevalue="false" label="Keep IDs not found in HPA?" checked="true"/> + </inputs> + <outputs> + <data name="hpa_output" format="tsv" label="" /> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="ids" value="file"/> + <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/> + <param name="header" value="true"/> + <param name="ncol" value="c8"/> + </conditional> + <conditional name="options"> + <param name="database" value="normal"/> + <param name="proteinatlas" value="normal_tissue.tsv"/> + <param name="normal_tissue" value="bronchus,lung,nasopharynx,salivary gland"/> + <param name="level" value="Not detected,Medium,High,Low"/> + <param name="reliability" value="Approved,Supported,Uncertain"/> + </conditional> + <param name="not_mapped" value="true" /> + <output name="hpa_output" file="Expres_levels_Lacombe_et_al_2017_OK.txt"/> + </test> + </tests> + <help><![CDATA[ + +**Description** + +This tool allows to retrieve expression profiles (normal or tumor tissue) from Human Protein Atlas (https://www.proteinatlas.org/) + +**Input** + +A list of ENSG (Ensembl gene) IDs must be entered (either via a copy/paste or by choosing a file); if it's not the case, please use the ID_Converter tool of ProteoRE. + +----- + +**Parameters** + +"Human Protein Atlas (normal or tumor tissue)": two resources are currently available + +* **Human normal tissue data**: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays. + +**Output** will be in the form: a tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability"). + +* **Human tumor tissue data**: staining profiles for proteins in human tumor tissue based on immunohistochemisty using tissue micro arrays and log-rank P value for Kaplan-Meier analysis of correlation between mRNA expression level and patient survival. + +**Ouptut** will be in the form: The tab-separated file includes Ensembl gene identifier ("Gene"), gene name ("Gene name"), tumor name ("Cancer"), the number of patients annotated for different staining levels ("High", "Medium", "Low" & "Not detected") and log-rank p values for patient survival and mRNA correlation ("prognostic - favourable", "unprognostic - favourable", "prognostic - unfavourable", "unprognostic - unfavourable"). + +"Select tissue(s)": information from more than one tissue can be retrieved + +"Keep IDs not found in HPA?": ENSG ID not found in Human Protein Atlas will be returned in the output file in the form of "NA" (default is "No") + +----- + +**Reliability score (only for normal tissue)** + +Reliability score is divided into Enhanced, Supported, Approved, or Uncertain with respect to the definitions from HPA: + +Enhanced - One or several antibodies with non-overlapping epitopes targeting the same gene have obtained enhanced validation based on orthogonal or independent antibody validation method. + +Supported - Consistency with RNA-seq and/or protein/gene characterization data, in combination with similar staining pattern if independent antibodies are available. + +Approved - Consistency with RNA-seq data in combination with inconsistency with, or lack of, protein/gene characterization data. Alternatively, consistency with protein/gene characterization data in combination with inconsistency with RNA-seq data. If independent antibodies are available, the staining pattern is partly similar or dissimilar. + +Uncertain - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, in combination with dissimilar staining pattern if independent antibodies are available. + +----- + +**Data sources (release date)** + +Both normal and tumor tissues data are based on the Human Protein Atlas version 18 and Ensembl version 88.38. + +----- + +.. class:: infomark + +**Authors** + +David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + + ]]></help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteore_protein_atlas_normal_tissue.loc.sample Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,2 @@ +#<id> <name> <tissue> <value> +HPA_normal_tissue_19-07-2018 HPA normal tissue 19/07/2018 HPA_normal_tissue /tool-data/HPA_normal_tissue_23-10-2018.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteore_protein_atlas_tumor_tissue.loc.sample Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,2 @@ +#<id> <name> <tissue> <value> +HPA_pathology_19-07-2018 HPA pathology 19/07/2018 HPA_pathology /tool-data/HPA_pathology_23-10-2018.tsv
--- a/sel_ann_hpa.R Thu Dec 06 10:07:13 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -# Read file and return file content as data.frame -read_file <- function(path,header){ - file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) - if (inherits(file,"try-error")){ - stop("File not found !") - }else{ - return(file) - } -} - -str2bool <- function(x){ - if (any(is.element(c("t","true"),tolower(x)))){ - return (TRUE) - }else if (any(is.element(c("f","false"),tolower(x)))){ - return (FALSE) - }else{ - return(NULL) - } -} - -# input has to be a list of IDs in ENSG format -# tissue is one of unique(HPA.normal.tissue$Tissue) -# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low" -# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain" -annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) { - dat <- subset(HPA_normal_tissue, Gene %in% input) - res.Tissue<-subset(dat, Tissue %in% tissue) - res.Level<-subset(res.Tissue, Level %in% level) - res.Rel<-subset(res.Level, Reliability %in% reliability) - - if (not_mapped_option) { - if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) { - not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))) - not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs)) - not.match <- cbind(not_match_IDs, unname(not.match)) - colnames(not.match) <- colnames(HPA_normal_tissue) - res <- rbind(res.Rel, not.match) - } else { - res <- res.Rel - } - - if (length(setdiff(input, unique(dat$Gene)))>0) { - not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene)))) - not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) - colnames(not.mapped) <- colnames(HPA_normal_tissue) - res <- rbind(res, not.mapped) - } - - } else { - res <- res.Rel - } - - return(res) - -} - -annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) { - dat <- subset(HPA_cancer_tissue, Gene %in% input) - res.Cancer<-subset(dat, Cancer %in% cancer) - - if (not_mapped_option) { - not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene)))) - not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) - colnames(not.mapped) <- colnames(HPA_cancer_tissue) - res <- rbind(res.Cancer, not.mapped) - } else { - res <- res.Cancer - } - return(res) -} - - -main <- function() { - args <- commandArgs(TRUE) - if(length(args)<1) { - args <- c("--help") - } - - # Help section - if("--help" %in% args) { - cat("Selection and Annotation HPA - Arguments: - --ref_file: HPA normal/cancer tissue file path - --input_type: type of input (list of id or filename) - --input: list of IDs in ENSG format - --column_number: the column number which you would like to apply... - --header: true/false if your file contains a header - --atlas: normal/cancer - if normal: - --tissue: list of tissues - --level: Not detected, Low, Medium, High - --reliability: Supportive, Uncertain - if cancer: - --cancer: Cancer tissues - --not_mapped: true/false if your output file should contain not-mapped and not-match IDs - --output: output filename \n") - q(save="no") - } - - # Parse arguments - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - - #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda") - #load("/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda") - - # Extract input - input_type = args$input_type - if (input_type == "list") { - list_id = strsplit(args$input, "[ \t\n]+")[[1]] - } else if (input_type == "file") { - filename = args$input - column_number = as.numeric(gsub("c", "" ,args$column_number)) - header = str2bool(args$header) - file = read_file(filename, header) - list_id = sapply(strsplit(file[,column_number], ";"), "[", 1) - } - input = list_id - - # Read reference file - reference_file = read_file(args$ref_file, TRUE) - - # Extract other options - atlas = args$atlas - not_mapped_option = str2bool(args$not_mapped) - if (atlas=="normal") { - tissue = strsplit(args$tissue, ",")[[1]] - level = strsplit(args$level, ",")[[1]] - reliability = strsplit(args$reliability, ",")[[1]] - # Calculation - res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option) - } else if (atlas=="cancer") { - cancer = strsplit(args$cancer, ",")[[1]] - # Calculation - res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option) - } - - # Write output - output = args$output - res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)) - write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) -} - -main()
--- a/sel_ann_hpa.xml Thu Dec 06 10:07:13 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,252 +0,0 @@ -<tool id="sel_ann_hpa" name="Get human expression data" version="2018.12.06.1"> - <description>by tissue (normal or tumor tissue Human Protein Atlas) - </description> - <requirements> - </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command interpreter="Rscript"> - $__tool_directory__/sel_ann_hpa.R - --ref_file="$__tool_directory__/$options.proteinatlas" - --input_type="$input.ids" - #if $input.ids == "list" - --input="$input.list" - #else - --input="$input.file" - --column_number="$input.ncol" - --header="$input.header" - #end if - - #if $options.database == "normal" - --atlas="normal" - --tissue="$options.normal_tissue" - --level="$options.level" - --reliability="$options.reliability" - #else if $options.database == "tumor" - --atlas="cancer" - --cancer="$options.cancer_tissue" - #end if - --not_mapped="$not_mapped" - --output="$hpa_output" - </command> - <inputs> - <conditional name="input" > - <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" > - <option value="list">Copy/paste your identifiers</option> - <option value="file" selected="true">Input file containing your identifiers</option> - </param> - <when value="list" > - <param name="list" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: ENSG00000174876 ENSG00000178372 ENSG00000159763' > - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - <mapping initial="none"> - <add source="'" target="__sq__"/> - </mapping> - </sanitizer> - </param> - </when> - <when value="file" > - <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of ENSG IDs" help="" /> - <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> - <param name="ncol" type="text" value="c1" label="The column number of ENSG IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> - </when> - </conditional> - <conditional name="options"> - <param name="database" type="select" label="Human Protein Atlas (data from normal or tumor tissue)"> - <option value="normal">Human Normal Tissue</option> - <option value="tumor">Human Tumor Tissue</option> - </param> - <when value="normal"> - <param name="proteinatlas" type="select" label="Human Protein Atlas" > - <!--options from_file="proteinatlas.loc" > - <column name="name" index="1"/> - <column name="value" index="2"/> - <filter type="remove_value" key="name" value="Full Human Protein Atlas (23/10/18)"/> - <filter type="remove_value" key="name" value="HPA Tumor Tissue (23/10/18)"/> - </options--> - <option value="tool-data/HPA_normal_tissue_23-10-2018.tsv">HPA Normal Tissue (23/10/18)</option> - </param> - <param name="normal_tissue" type="select" label="Select tissue(s)" multiple="True" display="checkboxes" optional="False"> - <option value="adrenal gland" >Adrenal gland</option> - <option value="appendix" >Appendix</option> - <option value="bone marrow" >Bone marrow</option> - <option value="breast" >Breast</option> - <option value="bronchus" >Bronchus</option> - <option value="caudate" >Caudate</option> - <option value="cerebellum" >Cerebellum</option> - <option value="cerebral cortex" >Cerebral cortex</option> - <option value="cervix" >Cervix</option> - <option value="colon" >Colon</option> - <option value="duodenum" >Duodenum</option> - <option value="endometrium 1" >Endometrium 1</option> - <option value="endometrium 2" >Endometrium 2</option> - <option value="epididymis" >Epididymis</option> - <option value="esophagus" >Esophagus</option> - <option value="fallopian tube" >Fallopian tube</option> - <option value="gallbladder" >Gallbladder</option> - <option value="heart muscle" >Heart muscle</option> - <option value="hippocampus" >Hippocampus</option> - <option value="kidney" >Kidney</option> - <option value="liver" >Liver</option> - <option value="lung" >Lung</option> - <option value="lymph node" >Lymph node</option> - <option value="nasopharynx" >Nasopharynx</option> - <option value="oral mucosa" >Oral mucosa</option> - <option value="ovary" >Ovary</option> - <option value="pancreas" >Pancreas</option> - <option value="parathyroid gland" >Parathyroid gland</option> - <option value="placenta" >Placenta</option> - <option value="prostate" >Prostate</option> - <option value="rectum" >Rectum</option> - <option value="salivary gland" >Salivary gland</option> - <option value="seminal vesicle" >Seminal vesicle</option> - <option value="skeletal muscle" >Skeletal muscle</option> - <option value="skin 1" >Skin 1</option> - <option value="skin 2" >Skin 2</option> - <option value="small intestine" >Small intestine</option> - <option value="smooth muscle" >Smooth muscle</option> - <option value="soft tissue 1" >Soft tissue 1</option> - <option value="soft tissue 2" >Soft tissue 2</option> - <option value="spleen" >Spleen</option> - <option value="stomach 1" >Stomach 1</option> - <option value="stomach 2" >Stomach 2</option> - <option value="testis" >Testis</option> - <option value="thyroid gland" >Thyroid gland</option> - <option value="tonsil" >Tonsil</option> - <option value="urinary bladder" >Urinary bladder</option> - <option value="vagina" >Vagina</option> - </param> - <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False"> - <option value="High" selected="true">High</option> - <option value="Medium">Medium</option> - <option value="Low">Low</option> - <option value="Not detected">Not detected</option> - </param> - <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False"> - <option value="Enhanced" selected="true">Enhanced</option> - <option value="Supported" selected="true">Supported</option> - <option value="Approved">Approved</option> - <option value="Uncertain">Uncertain</option> - </param> - </when> - <when value="tumor"> - <param name="proteinatlas" type="select" label="Human Protein Atlas" > - <!--options from_file="proteinatlas.loc" > - <column name="name" index="1" /> - <column name="value" index="2" /> - <filter type="remove_value" meta_ref="proteinatlas" key="name" value="Full Human Protein Atlas (23/10/18)" /> - <filter type="remove_value" meta_ref="proteinatlas" key="name" value="HPA Normal Tissue (23/10/18)" /> - </options--> - <option value="tool-data/HPA_pathology_23-10-2018.tsv">HPA Tumor Tissue (23/10/18)</option> - </param> - <param name="cancer_tissue" type="select" label="Keep and annotate genes present in the following tissue(s)" multiple="True" display="checkboxes" optional="False"> - <option value="breast cancer" >Breast cancer</option> - <option value="carcinoid" >Carcinoid</option> - <option value="cervical cancer" >Cervical cancer</option> - <option value="colorectal cancer" >Colorectal cancer</option> - <option value="endometrial cancer" >Endometrial cancer</option> - <option value="glioma" >Glioma</option> - <option value="head and neck cancer" >Head and neck cancer</option> - <option value="liver cancer" >Liver cancer</option> - <option value="lung cancer" >Lung cancer</option> - <option value="lymphoma" >Lymphoma</option> - <option value="melanoma" >Melanoma</option> - <option value="ovarian cancer" >Ovarian cancer</option> - <option value="pancreatic cancer" >Pancreatic cancer</option> - <option value="prostate cancer" >Prostate cancer</option> - <option value="renal cancer" >Renal cancer</option> - <option value="skin cancer" >Skin cancer</option> - <option value="stomach cancer" >Stomach cancer</option> - <option value="testis cancer" >Testis cancer</option> - <option value="thyroid cancer" >Thyroid cancer</option> - <option value="urothelial cancer" >Urothelial cancer</option> - </param> - </when> - </conditional> - <param name="not_mapped" type="boolean" truevalue="true" falsevalue="false" label="Would you like to include in output the IDs that do not match criteria or not mapped in HPA?" checked="true"/> - </inputs> - <outputs> - <data name="hpa_output" format="tsv" label="" /> - </outputs> - <tests> - <test> - <conditional name="input"> - <param name="ids" value="file"/> - <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/> - <param name="header" value="true"/> - <param name="ncol" value="c8"/> - </conditional> - <conditional name="options"> - <param name="database" value="normal"/> - <param name="proteinatlas" value="normal_tissue.tsv"/> - <param name="normal_tissue" value="bronchus,lung,nasopharynx,salivary gland"/> - <param name="level" value="Not detected,Medium,High,Low"/> - <param name="reliability" value="Approved,Supported,Uncertain"/> - </conditional> - <param name="not_mapped" value="true" /> - <output name="hpa_output" file="Expres_levels_Lacombe_et_al_2017_OK.txt"/> - </test> - </tests> - <help><![CDATA[ -This tool retrieve information from Human Protein Atlas (https://www.proteinatlas.org/) -regarding the expression profiles of human genes both on the mRNA and protein level. - -A list of ENSG (Ensembl gene) IDs must be entered (either via a copy/paste or by choosing a file), -if it's not the case, please use the ID_Convert tool from ProteoRE. - -The resources from Human Protein Atlas that can be queried are the following: - -* **Human normal tissue data**: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays. - - The tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability"). - - The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38. - -* **Human tumor tissue data**: staining profiles for proteins in human tumor tissue based on immunohistochemisty using tissue micro arrays and log-rank P value for Kaplan-Meier analysis of correlation between mRNA expression level and patient survival. - - The tab-separated file includes Ensembl gene identifier ("Gene"), gene name ("Gene name"), tumor name ("Cancer"), the number of patients annotated for different staining levels ("High", "Medium", "Low" & "Not detected") and log-rank p values for patient survival and mRNA correlation ("prognostic - favourable", "unprognostic - favourable", "prognostic - unfavourable", "unprognostic - unfavourable"). - - The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38. - ------ - -**Reliability score** - -Reliability score is divided into Enhanced, Supported, Approved, or Uncertain with respect -to the definitions from HPA: - -Enhanced - One or several antibodies with non-overlapping epitopes targeting the same gene -have obtained enhanced validation based on orthogonal or independent antibody validation method. - -Supported - Consistency with RNA-seq and/or protein/gene characterization data, -in combination with similar staining pattern if independent antibodies are available. - -Approved - Consistency with RNA-seq data in combination with inconsistency with, or lack of, -protein/gene characterization data. Alternatively, consistency with protein/gene characterization data -in combination with inconsistency with RNA-seq data. If independent antibodies are available, -the staining pattern is partly similar or dissimilar. - -Uncertain - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, -in combination with dissimilar staining pattern if independent antibodies are available. - ------ - -.. class:: infomark - -**Authors** - -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR - -Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform - -This work has been partially funded through the French National Agency for Research (ANR) IFB project. - -Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. - - ]]></help> - <citations> - </citations> -</tool>
--- a/tool-data/proteinatlas.loc.sample Thu Dec 06 10:07:13 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#This file lists the locations and dbkeys of Human Protein Atlas files. -#<name> <display_name (value)> <file_path> -#full Full Human Protein Atlas (23/10/18) tool-data/HPA_full_atlas_23-10-2018.tsv -normal_tissue HPA Normal Tissue (23/10/18) tool-data/HPA_normal_tissue_23-10-2018.tsv -pathology HPA Tumor Tissue (23/10/18) tool-data/HPA_pathology_23-10-2018.tsv
--- a/tool_data_table_conf.xml.sample Thu Dec 06 10:07:13 2018 -0500 +++ b/tool_data_table_conf.xml.sample Wed Mar 06 09:19:05 2019 -0500 @@ -1,7 +1,10 @@ <tables> - <!-- Location of Human Protein Atlas --> - <table name="proteinatlas" comment_char="#"> - <columns>name, value, path</columns> - <file path="tool-data/proteinatlas.loc" /> + <table name="proteore_protein_atlas_normal_tissue" comment_char="#"> + <columns>id, name, tissue, value</columns> + <file path="tool-data/proteore_protein_atlas_normal_tissue.loc" /> + </table> + <table name="proteore_protein_atlas_tumor_tissue" comment_char="#"> + <columns>id, name, tissue, value</columns> + <file path="tool-data/proteore_protein_atlas_tumor_tissue.loc" /> </table> </tables> \ No newline at end of file
