Mercurial > repos > proteore > proteore_retrieve_from_hpa
changeset 1:cfcc7a780991 draft default tip
planemo upload commit c599cfc156c77626df2b674bdfbd437b9f664ab9
| author | proteore |
|---|---|
| date | Thu, 13 Dec 2018 03:59:41 -0500 |
| parents | 6fc8d303bcf1 |
| children | |
| files | Build_tissue-specific_expression_dataset.R Build_tissue-specific_expression_dataset.xml RetrieveFromHPA.R retrieve_from_hpa.xml |
| diffstat | 4 files changed, 286 insertions(+), 286 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Build_tissue-specific_expression_dataset.R Thu Dec 13 03:59:41 2018 -0500 @@ -0,0 +1,76 @@ + + +select_HPAimmunohisto<-function(hpa_ref, tissue, level, reliability) { + HPA.normal = read.table(hpa_ref,header=TRUE,sep="\t",stringsAsFactors = FALSE) + if (tissue == "tissue") { + tissue <- unique(HPA.normal$Tissue) + } + if (level == "level") { + level <- unique(HPA.normal$Level) + } + if (reliability == "reliability") { + reliability <- unique(HPA.normal$Reliability) + } + res.imm <- subset(HPA.normal, Tissue%in%tissue & Level%in%level & Reliability%in%reliability) + return(res.imm) +} + + + +select_HPARNAseq<-function(hpa_ref, sample) { + HPA.rnaTissue = read.table(hpa_ref,header=TRUE,sep="\t",stringsAsFactors = FALSE) + res.rna <- subset(HPA.rnaTissue, Sample%in%sample, select = -c(Unit)) + colnames(res.rna)[which(colnames(res.rna) == 'Value')] <- 'Value (TPM unit)' + return(res.rna) +} + +main <- function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --data_source: immuno/rnaseq + --hpe_ref: path to reference file normal_tissue.tsv/rna_tissue.tsv) + if immuno: + --tissue: list of tissues + --level: Not detected, Low, Medium, High + --reliability: Supported, Approved, Enhanced, Uncertain + if rnaseq: + --sample: Sample tissues + --output: output filename \n") + q(save="no") + } + + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + # Extract options + data_source = args$data_source + hpa_ref = args$hpa_ref + if (data_source == "immuno") { + tissue = strsplit(args$tissue, ",")[[1]] + level = strsplit(args$level, ",")[[1]] + reliability = strsplit(args$reliability, ",")[[1]] + # Calculation + res = select_HPAimmunohisto(hpa_ref, tissue, level, reliability) + } + else if (data_source == "rnaseq") { + sample = strsplit(args$sample, ",")[[1]] + # Calculation + res = select_HPARNAseq(hpa_ref, sample) + } + + # Write output + output = args$output + write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) +} + +main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Build_tissue-specific_expression_dataset.xml Thu Dec 13 03:59:41 2018 -0500 @@ -0,0 +1,210 @@ +<tool id="retrieve_from_hpa" name="Build tissue-specific expression dataset" version="2018.12.12"> +<description>[Human Protein Atlas](no input required)</description> +<requirements> + <requirement type="package" version="3.4.1">R</requirement> +</requirements> +<stdio> + <exit_code range="1:" /> +</stdio> +<command interpreter="Rscript"> + $__tool_directory__/Build_tissue-specific_expression_dataset.R + --data_source="$input.data_source" + #if $input.data_source == "immuno" + --hpa_ref="$__tool_directory__/normal_tissue.tsv" + --tissue="$input.normal_tissue" + --level="$input.level" + --reliability="$input.reliability" + #else if $input.data_source == "rnaseq" + --hpa_ref="$__tool_directory__/rna_tissue.tsv" + --sample="$input.sample" + #end if + --output="$output" +</command> + +<inputs> +<conditional name="input"> + <param name="data_source" type="select" label="Please choose experimental data source (antibody- or RNAseq-based)"> + <option value="immuno">Expression profiles based on immunohistochemistry</option> + <option value="rnaseq">RNA levels based on RNA-seq</option> + </param> + <when value="immuno"> + <param name="normal_tissue" type="select" label="Select tissue by clicking the dropdown menu below" multiple="True" optional="False"> + <option value="adrenal gland" >Adrenal gland</option> + <option value="appendix" >Appendix</option> + <option value="bone marrow" >Bone marrow</option> + <option value="breast" >Breast</option> + <option value="bronchus" >Bronchus</option> + <option value="caudate" >Caudate</option> + <option value="cerebellum" >Cerebellum</option> + <option value="cerebral cortex" >Cerebral cortex</option> + <option value="cervix" >Cervix</option> + <option value="colon" >Colon</option> + <option value="duodenum" >Duodenum</option> + <option value="endometrium 1" >Endometrium 1</option> + <option value="endometrium 2" >Endometrium 2</option> + <option value="epididymis" >Epididymis</option> + <option value="esophagus" >Esophagus</option> + <option value="fallopian tube" >Fallopian tube</option> + <option value="gallbladder" >Gallbladder</option> + <option value="heart muscle" >Heart muscle</option> + <option value="hippocampus" >Hippocampus</option> + <option value="kidney" >Kidney</option> + <option value="liver" >Liver</option> + <option value="lung" >Lung</option> + <option value="lymph node" >Lymph node</option> + <option value="nasopharynx" >Nasopharynx</option> + <option value="oral mucosa" >Oral mucosa</option> + <option value="ovary" >Ovary</option> + <option value="pancreas" >Pancreas</option> + <option value="parathyroid gland" >Parathyroid gland</option> + <option value="placenta" >Placenta</option> + <option value="prostate" >Prostate</option> + <option value="rectum" >Rectum</option> + <option value="salivary gland" >Salivary gland</option> + <option value="seminal vesicle" >Seminal vesicle</option> + <option value="skeletal muscle" >Skeletal muscle</option> + <option value="skin 1" >Skin 1</option> + <option value="skin 2" >Skin 2</option> + <option value="small intestine" >Small intestine</option> + <option value="smooth muscle" >Smooth muscle</option> + <option value="soft tissue 1" >Soft tissue 1</option> + <option value="soft tissue 2" >Soft tissue 2</option> + <option value="spleen" >Spleen</option> + <option value="stomach 1" >Stomach 1</option> + <option value="stomach 2" >Stomach 2</option> + <option value="testis" >Testis</option> + <option value="thyroid gland" >Thyroid gland</option> + <option value="tonsil" >Tonsil</option> + <option value="urinary bladder" >Urinary bladder</option> + <option value="vagina" >Vagina</option> + </param> + <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False"> + <option value="High" selected="true">High</option> + <option value="Medium">Medium</option> + <option value="Low">Low</option> + <option value="Not detected">Not detected</option> + </param> + <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False"> + <option value="Enhanced" selected="true">Enhanced</option> + <option value="Supported" selected="true">Supported</option> + <option value="Approved">Approved</option> + <option value="Uncertain">Uncertain</option> + + </param> + </when> + <when value="rnaseq"> + <param name="sample" type="select" label="Keep and annotate genes present in the following tissue(s)" multiple="True" optional="False"> + <option value="adipose tissue">Adipose tissue</option> + <option value="adrenal gland">Adrenal gland</option> + <option value="appendix">Appendix</option> + <option value="bone marrow">Bone marrow</option> + <option value="breast">Breast</option> + <option value="cerebral cortex">Cerebral cortex</option> + <option value="cervix, uterine">Cervix, uterine</option> + <option value="colon">Colon</option> + <option value="duodenum">Duodenum</option> + <option value="endometrium">Endometrium</option> + <option value="epididymis">Epididymis</option> + <option value="esophagus">Esophagus</option> + <option value="fallopian tube">Fallopian tube</option> + <option value="gallbladder">Gallbladder</option> + <option value="heart muscle">Heart muscle</option> + <option value="kidney">Kidney</option> + <option value="liver">Liver</option> + <option value="lung">Lung</option> + <option value="lymph node">Lymph node</option> + <option value="ovary">Ovary</option> + <option value="pancreas">Pancreas</option> + <option value="parathyroid gland">Parathyroid gland</option> + <option value="placenta">Placenta</option> + <option value="prostate">Prostate</option> + <option value="rectum">Rectum</option> + <option value="salivary gland">Salivary gland</option> + <option value="seminal vesicle">Seminal vesicle</option> + <option value="skeletal muscle">Skeletal muscle</option> + <option value="skin">Skin</option> + <option value="small intestine">Small intestine</option> + <option value="smooth muscle">Smooth muscle</option> + <option value="spleen">Spleen</option> + <option value="stomach">Stomach</option> + <option value="testis">Testis</option> + <option value="thyroid gland">Thyroid gland</option> + <option value="tonsil">Tonsil</option> + <option value="rinary bladder">Urinary bladder</option> + </param> + </when> +</conditional> +</inputs> + +<outputs> + <data name="output" format="tsv" label=""/> +</outputs> + +<tests> + <test> + <conditional name="input"> + <param name="data_source" value="immuno"/> + <param name="normal_tissue" value="lung,ovary"/> + <param name="level" value="High"/> + <param name="reliability" value="Approved,Supported"/> + </conditional> + <output name="output" file="test.txt"/> + </test> +</tests> + +<help><![CDATA[ + +This tool allows to retrieve information from Human Protein Atlas (https://www.proteinatlas.org/) +regarding the expression profiles of human genes both at the mRNA and protein levels +without any input required. It could be used to: + +* Select a list of proteins highly expressed in a given tissue for data mining or comparison purpose in terms of expected protein content from one tissue to another + +* Build a "reference" proteome restricted to a given tissue (for instance for GO analysis as a reference background restricted to a biological sample) + +The resources from Human Protein Atlas that can be queried are the following: + +1. Human normal tissue data: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays measured in 58 tissues and 82 cell types. + + The tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability score"). + + The reliability score is divided into Enhanced, Supported, Approved, or Uncertain with the following definitions: + + * Enhanced: One or several antibodies with non-overlapping epitopes targeting the same gene have obtained enhanced validation based on orthogonal or independent antibody validation method. + + * Supported: Consistency with RNA-seq and/or protein/gene characterization data, in combination with similar staining pattern if independent antibodies are available. + + * Approved: Consistency with RNA-seq data in combination with inconsistency with, or lack of, protein/gene characterization data. Alternatively, consistency with protein/gene characterization data in combination with inconsistency with RNA-seq data. If independent antibodies are available, the staining pattern is partly similar or dissimilar. + + * Uncertain: + + Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, in combination with dissimilar staining pattern if independent antibodies are available. + + The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38. + +2. RNA levels based on RNA-seq data: RNA levels measured in 64 cell lines and 37 tissues based on RNA-seq experiments. The tab-separated file includes Ensembl gene identifier ("Gene"), analysed sample ("Sample") and transcripts per million ("Value" and "Unit"). + + The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38. + + For more information: https://www.proteinatlas.org/about/help + +----- + +.. class:: infomark + +**Authors** + +T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + +]]></help> + +<citations> +</citations> + +</tool>
--- a/RetrieveFromHPA.R Mon Nov 12 11:19:38 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ - - -select_HPAimmunohisto<-function(hpa_ref, tissue, level, reliability) { - HPA.normal = read.table(hpa_ref,header=TRUE,sep="\t",stringsAsFactors = FALSE) - if (tissue == "tissue") { - tissue <- unique(HPA.normal$Tissue) - } - if (level == "level") { - level <- unique(HPA.normal$Level) - } - if (reliability == "reliability") { - reliability <- unique(HPA.normal$Reliability) - } - res.imm <- subset(HPA.normal, Tissue%in%tissue & Level%in%level & Reliability%in%reliability) - return(res.imm) -} - - - -select_HPARNAseq<-function(hpa_ref, sample) { - HPA.rnaTissue = read.table(hpa_ref,header=TRUE,sep="\t",stringsAsFactors = FALSE) - res.rna <- subset(HPA.rnaTissue, Sample%in%sample, select = -c(Unit)) - colnames(res.rna)[which(colnames(res.rna) == 'Value')] <- 'Value (TPM unit)' - return(res.rna) -} - -main <- function() { - args <- commandArgs(TRUE) - if(length(args)<1) { - args <- c("--help") - } - - # Help section - if("--help" %in% args) { - cat("Selection and Annotation HPA - Arguments: - --data_source: immuno/rnaseq - --hpe_ref: path to reference file normal_tissue.tsv/rna_tissue.tsv) - if immuno: - --tissue: list of tissues - --level: Not detected, Low, Medium, High - --reliability: Supported, Approved, Enhanced, Uncertain - if rnaseq: - --sample: Sample tissues - --output: output filename \n") - q(save="no") - } - - # Parse arguments - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - - # Extract options - data_source = args$data_source - hpa_ref = args$hpa_ref - if (data_source == "immuno") { - tissue = strsplit(args$tissue, ",")[[1]] - level = strsplit(args$level, ",")[[1]] - reliability = strsplit(args$reliability, ",")[[1]] - # Calculation - res = select_HPAimmunohisto(hpa_ref, tissue, level, reliability) - } - else if (data_source == "rnaseq") { - sample = strsplit(args$sample, ",")[[1]] - # Calculation - res = select_HPARNAseq(hpa_ref, sample) - } - - # Write output - output = args$output - write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) -} - -main()
--- a/retrieve_from_hpa.xml Mon Nov 12 11:19:38 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -<tool id="retrieve_from_hpa" name="Tissue-specific expression data from Human Protein Atlas" version="2018.10.12"> -<description>(no input required)</description> -<requirements> - <requirement type="package" version="3.4.1">R</requirement> -</requirements> -<stdio> - <exit_code range="1:" /> -</stdio> -<command interpreter="Rscript"> - $__tool_directory__/RetrieveFromHPA.R - --data_source="$input.data_source" - #if $input.data_source == "immuno" - --hpa_ref="$__tool_directory__/normal_tissue.tsv" - --tissue="$input.normal_tissue" - --level="$input.level" - --reliability="$input.reliability" - #else if $input.data_source == "rnaseq" - --hpa_ref="$__tool_directory__/rna_tissue.tsv" - --sample="$input.sample" - #end if - --output="$output" -</command> - -<inputs> -<conditional name="input"> - <param name="data_source" type="select" label="Please choose experimental data source (antibody- or RNAseq-based)"> - <option value="immuno">Expression profiles based on immunohistochemistry</option> - <option value="rnaseq">RNA levels based on RNA-seq</option> - </param> - <when value="immuno"> - <param name="normal_tissue" type="select" label="Select tissue by clicking the dropdown menu below" multiple="True" optional="False"> - <option value="adrenal gland" >Adrenal gland</option> - <option value="appendix" >Appendix</option> - <option value="bone marrow" >Bone marrow</option> - <option value="breast" >Breast</option> - <option value="bronchus" >Bronchus</option> - <option value="caudate" >Caudate</option> - <option value="cerebellum" >Cerebellum</option> - <option value="cerebral cortex" >Cerebral cortex</option> - <option value="cervix" >Cervix</option> - <option value="colon" >Colon</option> - <option value="duodenum" >Duodenum</option> - <option value="endometrium 1" >Endometrium 1</option> - <option value="endometrium 2" >Endometrium 2</option> - <option value="epididymis" >Epididymis</option> - <option value="esophagus" >Esophagus</option> - <option value="fallopian tube" >Fallopian tube</option> - <option value="gallbladder" >Gallbladder</option> - <option value="heart muscle" >Heart muscle</option> - <option value="hippocampus" >Hippocampus</option> - <option value="kidney" >Kidney</option> - <option value="liver" >Liver</option> - <option value="lung" >Lung</option> - <option value="lymph node" >Lymph node</option> - <option value="nasopharynx" >Nasopharynx</option> - <option value="oral mucosa" >Oral mucosa</option> - <option value="ovary" >Ovary</option> - <option value="pancreas" >Pancreas</option> - <option value="parathyroid gland" >Parathyroid gland</option> - <option value="placenta" >Placenta</option> - <option value="prostate" >Prostate</option> - <option value="rectum" >Rectum</option> - <option value="salivary gland" >Salivary gland</option> - <option value="seminal vesicle" >Seminal vesicle</option> - <option value="skeletal muscle" >Skeletal muscle</option> - <option value="skin 1" >Skin 1</option> - <option value="skin 2" >Skin 2</option> - <option value="small intestine" >Small intestine</option> - <option value="smooth muscle" >Smooth muscle</option> - <option value="soft tissue 1" >Soft tissue 1</option> - <option value="soft tissue 2" >Soft tissue 2</option> - <option value="spleen" >Spleen</option> - <option value="stomach 1" >Stomach 1</option> - <option value="stomach 2" >Stomach 2</option> - <option value="testis" >Testis</option> - <option value="thyroid gland" >Thyroid gland</option> - <option value="tonsil" >Tonsil</option> - <option value="urinary bladder" >Urinary bladder</option> - <option value="vagina" >Vagina</option> - </param> - <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False"> - <option value="High" selected="true">High</option> - <option value="Medium">Medium</option> - <option value="Low">Low</option> - <option value="Not detected">Not detected</option> - </param> - <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False"> - <option value="Enhanced" selected="true">Enhanced</option> - <option value="Supported" selected="true">Supported</option> - <option value="Approved">Approved</option> - <option value="Uncertain">Uncertain</option> - - </param> - </when> - <when value="rnaseq"> - <param name="sample" type="select" label="Keep and annotate genes present in the following tissue(s)" multiple="True" optional="False"> - <option value="adipose tissue">Adipose tissue</option> - <option value="adrenal gland">Adrenal gland</option> - <option value="appendix">Appendix</option> - <option value="bone marrow">Bone marrow</option> - <option value="breast">Breast</option> - <option value="cerebral cortex">Cerebral cortex</option> - <option value="cervix, uterine">Cervix, uterine</option> - <option value="colon">Colon</option> - <option value="duodenum">Duodenum</option> - <option value="endometrium">Endometrium</option> - <option value="epididymis">Epididymis</option> - <option value="esophagus">Esophagus</option> - <option value="fallopian tube">Fallopian tube</option> - <option value="gallbladder">Gallbladder</option> - <option value="heart muscle">Heart muscle</option> - <option value="kidney">Kidney</option> - <option value="liver">Liver</option> - <option value="lung">Lung</option> - <option value="lymph node">Lymph node</option> - <option value="ovary">Ovary</option> - <option value="pancreas">Pancreas</option> - <option value="parathyroid gland">Parathyroid gland</option> - <option value="placenta">Placenta</option> - <option value="prostate">Prostate</option> - <option value="rectum">Rectum</option> - <option value="salivary gland">Salivary gland</option> - <option value="seminal vesicle">Seminal vesicle</option> - <option value="skeletal muscle">Skeletal muscle</option> - <option value="skin">Skin</option> - <option value="small intestine">Small intestine</option> - <option value="smooth muscle">Smooth muscle</option> - <option value="spleen">Spleen</option> - <option value="stomach">Stomach</option> - <option value="testis">Testis</option> - <option value="thyroid gland">Thyroid gland</option> - <option value="tonsil">Tonsil</option> - <option value="rinary bladder">Urinary bladder</option> - </param> - </when> -</conditional> -</inputs> - -<outputs> - <data name="output" format="tsv" label=""/> -</outputs> - -<tests> - <test> - <conditional name="input"> - <param name="data_source" value="immuno"/> - <param name="normal_tissue" value="lung,ovary"/> - <param name="level" value="High"/> - <param name="reliability" value="Approved,Supported"/> - </conditional> - <output name="output" file="test.txt"/> - </test> -</tests> - -<help><![CDATA[ - -This tool allows to retrieve information from Human Protein Atlas (https://www.proteinatlas.org/) -regarding the expression profiles of human genes both at the mRNA and protein levels -without any input required. It could be used to: - -* Select a list of proteins highly expressed in a given tissue for data mining or comparison purpose in terms of expected protein content from one tissue to another - -* Build a "reference" proteome restricted to a given tissue (for instance for GO analysis as a reference background restricted to a biological sample) - -The resources from Human Protein Atlas that can be queried are the following: - -1. Human normal tissue data: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays measured in 58 tissues and 82 cell types. - - The tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability score"). - - The reliability score is divided into Enhanced, Supported, Approved, or Uncertain with the following definitions: - - * Enhanced: One or several antibodies with non-overlapping epitopes targeting the same gene have obtained enhanced validation based on orthogonal or independent antibody validation method. - - * Supported: Consistency with RNA-seq and/or protein/gene characterization data, in combination with similar staining pattern if independent antibodies are available. - - * Approved: Consistency with RNA-seq data in combination with inconsistency with, or lack of, protein/gene characterization data. Alternatively, consistency with protein/gene characterization data in combination with inconsistency with RNA-seq data. If independent antibodies are available, the staining pattern is partly similar or dissimilar. - - * Uncertain: - - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, in combination with dissimilar staining pattern if independent antibodies are available. - - The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38. - -2. RNA levels based on RNA-seq data: RNA levels measured in 64 cell lines and 37 tissues based on RNA-seq experiments. The tab-separated file includes Ensembl gene identifier ("Gene"), analysed sample ("Sample") and transcripts per million ("Value" and "Unit"). - - The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38. - - For more information: https://www.proteinatlas.org/about/help - ------ - -.. class:: infomark - -**Authors** - -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR - -Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform - -This work has been partially funded through the French National Agency for Research (ANR) IFB project. - -Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. - -]]></help> - -<citations> -</citations> - -</tool>
