# HG changeset patch
# User proteore
# Date 1551881945 18000
# Node ID 3bedd074c48595067b583615d1a4bf4a175f0352
# Parent db2cd451835ffcc0e8e41bc600dfdf41ef7273de
planemo upload commit c6e1fd1f68e81ce9eea6ad66adee21070f2893ef-dirty
diff -r db2cd451835f -r 3bedd074c485 README.rst
--- a/README.rst Thu Dec 06 10:07:13 2018 -0500
+++ b/README.rst Wed Mar 06 09:19:05 2019 -0500
@@ -1,9 +1,9 @@
-Wrapper for Get expression data by tissue Tool
+Wrapper for Get expression profiles by tissue Tool
=================================================
**Authors**
-T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
diff -r db2cd451835f -r 3bedd074c485 get_expression_profiles.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_expression_profiles.R Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,146 @@
+# Read file and return file content as data.frame
+read_file <- function(path,header){
+ file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
+ if (inherits(file,"try-error")){
+ stop("File not found !")
+ }else{
+ return(file)
+ }
+}
+
+str2bool <- function(x){
+ if (any(is.element(c("t","true"),tolower(x)))){
+ return (TRUE)
+ }else if (any(is.element(c("f","false"),tolower(x)))){
+ return (FALSE)
+ }else{
+ return(NULL)
+ }
+}
+
+# input has to be a list of IDs in ENSG format
+# tissue is one of unique(HPA.normal.tissue$Tissue)
+# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low"
+# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain"
+annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) {
+ dat <- subset(HPA_normal_tissue, Gene %in% input)
+ res.Tissue<-subset(dat, Tissue %in% tissue)
+ res.Level<-subset(res.Tissue, Level %in% level)
+ res.Rel<-subset(res.Level, Reliability %in% reliability)
+
+ if (not_mapped_option) {
+ if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) {
+ not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene))))
+ not.match <- matrix("no match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs))
+ not.match <- cbind(not_match_IDs, unname(not.match))
+ colnames(not.match) <- colnames(HPA_normal_tissue)
+ res <- rbind(res.Rel, not.match)
+ } else {
+ res <- res.Rel
+ }
+
+ if (length(setdiff(input, unique(dat$Gene)))>0) {
+ not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene))))
+ not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
+ colnames(not.mapped) <- colnames(HPA_normal_tissue)
+ res <- rbind(res, not.mapped)
+ }
+
+ } else {
+ res <- res.Rel
+ }
+
+ return(res)
+
+}
+
+annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) {
+ dat <- subset(HPA_cancer_tissue, Gene %in% input)
+ res.Cancer<-subset(dat, Cancer %in% cancer)
+
+ if (not_mapped_option) {
+ not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene))))
+ not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
+ colnames(not.mapped) <- colnames(HPA_cancer_tissue)
+ res <- rbind(res.Cancer, not.mapped)
+ } else {
+ res <- res.Cancer
+ }
+ return(res)
+}
+
+
+main <- function() {
+ args <- commandArgs(TRUE)
+ if(length(args)<1) {
+ args <- c("--help")
+ }
+
+ # Help section
+ if("--help" %in% args) {
+ cat("Selection and Annotation HPA
+ Arguments:
+ --ref_file: HPA normal/cancer tissue file path
+ --input_type: type of input (list of id or filename)
+ --input: list of IDs in ENSG format
+ --column_number: the column number which you would like to apply...
+ --header: true/false if your file contains a header
+ --atlas: normal/cancer
+ if normal:
+ --tissue: list of tissues
+ --level: Not detected, Low, Medium, High
+ --reliability: Supportive, Uncertain
+ if cancer:
+ --cancer: Cancer tissues
+ --not_mapped: true/false if your output file should contain not-mapped and not-match IDs
+ --output: output filename \n")
+ q(save="no")
+ }
+
+ # Parse arguments
+ parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+ argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+ args <- as.list(as.character(argsDF$V2))
+ names(args) <- argsDF$V1
+
+ #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda")
+ #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda")
+
+ # Extract input
+ input_type = args$input_type
+ if (input_type == "list") {
+ list_id = strsplit(args$input, "[ \t\n]+")[[1]]
+ } else if (input_type == "file") {
+ filename = args$input
+ column_number = as.numeric(gsub("c", "" ,args$column_number))
+ header = str2bool(args$header)
+ file = read_file(filename, header)
+ list_id = sapply(strsplit(file[,column_number], ";"), "[", 1)
+ }
+ input = list_id
+
+ # Read reference file
+ reference_file = read_file(args$ref_file, TRUE)
+
+ # Extract other options
+ atlas = args$atlas
+ not_mapped_option = str2bool(args$not_mapped)
+ if (atlas=="normal") {
+ tissue = strsplit(args$tissue, ",")[[1]]
+ level = strsplit(args$level, ",")[[1]]
+ reliability = strsplit(args$reliability, ",")[[1]]
+ # Calculation
+ res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option)
+ } else if (atlas=="cancer") {
+ cancer = strsplit(args$cancer, ",")[[1]]
+ # Calculation
+ res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option)
+ }
+
+ # Write output
+ output = args$output
+ res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))
+ write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE)
+}
+
+main()
diff -r db2cd451835f -r 3bedd074c485 get_expression_profiles.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_expression_profiles.xml Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,254 @@
+
+ by (normal or tumor) tissue/cell type [Human Protein Atlas]
+
+
+
+
+
+
+
+ $__tool_directory__/get_expression_profiles.R
+
+ #if "protein_atlas" in str($options.proteinatlas).split("/")
+ --ref_file="$options.proteinatlas"
+ #else
+ --ref_file="$__tool_directory__/$options.proteinatlas"
+ #end if
+
+ --input_type="$input.ids"
+ #if $input.ids == "list"
+ --input="$input.list"
+ #else
+ --input="$input.file"
+ --column_number="$input.ncol"
+ --header="$input.header"
+ #end if
+
+ #if $options.database == "normal"
+ --atlas="normal"
+ --tissue="$options.normal_tissue"
+ --level="$options.level"
+ --reliability="$options.reliability"
+ #else if $options.database == "tumor"
+ --atlas="cancer"
+ --cancer="$options.cancer_tissue"
+ #end if
+ --not_mapped="$not_mapped"
+ --output="$hpa_output"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ [c]{0,1}[0-9]+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r db2cd451835f -r 3bedd074c485 proteore_protein_atlas_normal_tissue.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/proteore_protein_atlas_normal_tissue.loc.sample Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,2 @@
+#
+HPA_normal_tissue_19-07-2018 HPA normal tissue 19/07/2018 HPA_normal_tissue /tool-data/HPA_normal_tissue_23-10-2018.tsv
diff -r db2cd451835f -r 3bedd074c485 proteore_protein_atlas_tumor_tissue.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/proteore_protein_atlas_tumor_tissue.loc.sample Wed Mar 06 09:19:05 2019 -0500
@@ -0,0 +1,2 @@
+#
+HPA_pathology_19-07-2018 HPA pathology 19/07/2018 HPA_pathology /tool-data/HPA_pathology_23-10-2018.tsv
diff -r db2cd451835f -r 3bedd074c485 sel_ann_hpa.R
--- a/sel_ann_hpa.R Thu Dec 06 10:07:13 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,146 +0,0 @@
-# Read file and return file content as data.frame
-read_file <- function(path,header){
- file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
- if (inherits(file,"try-error")){
- stop("File not found !")
- }else{
- return(file)
- }
-}
-
-str2bool <- function(x){
- if (any(is.element(c("t","true"),tolower(x)))){
- return (TRUE)
- }else if (any(is.element(c("f","false"),tolower(x)))){
- return (FALSE)
- }else{
- return(NULL)
- }
-}
-
-# input has to be a list of IDs in ENSG format
-# tissue is one of unique(HPA.normal.tissue$Tissue)
-# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low"
-# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain"
-annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) {
- dat <- subset(HPA_normal_tissue, Gene %in% input)
- res.Tissue<-subset(dat, Tissue %in% tissue)
- res.Level<-subset(res.Tissue, Level %in% level)
- res.Rel<-subset(res.Level, Reliability %in% reliability)
-
- if (not_mapped_option) {
- if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) {
- not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene))))
- not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs))
- not.match <- cbind(not_match_IDs, unname(not.match))
- colnames(not.match) <- colnames(HPA_normal_tissue)
- res <- rbind(res.Rel, not.match)
- } else {
- res <- res.Rel
- }
-
- if (length(setdiff(input, unique(dat$Gene)))>0) {
- not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene))))
- not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
- colnames(not.mapped) <- colnames(HPA_normal_tissue)
- res <- rbind(res, not.mapped)
- }
-
- } else {
- res <- res.Rel
- }
-
- return(res)
-
-}
-
-annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) {
- dat <- subset(HPA_cancer_tissue, Gene %in% input)
- res.Cancer<-subset(dat, Cancer %in% cancer)
-
- if (not_mapped_option) {
- not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene))))
- not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
- colnames(not.mapped) <- colnames(HPA_cancer_tissue)
- res <- rbind(res.Cancer, not.mapped)
- } else {
- res <- res.Cancer
- }
- return(res)
-}
-
-
-main <- function() {
- args <- commandArgs(TRUE)
- if(length(args)<1) {
- args <- c("--help")
- }
-
- # Help section
- if("--help" %in% args) {
- cat("Selection and Annotation HPA
- Arguments:
- --ref_file: HPA normal/cancer tissue file path
- --input_type: type of input (list of id or filename)
- --input: list of IDs in ENSG format
- --column_number: the column number which you would like to apply...
- --header: true/false if your file contains a header
- --atlas: normal/cancer
- if normal:
- --tissue: list of tissues
- --level: Not detected, Low, Medium, High
- --reliability: Supportive, Uncertain
- if cancer:
- --cancer: Cancer tissues
- --not_mapped: true/false if your output file should contain not-mapped and not-match IDs
- --output: output filename \n")
- q(save="no")
- }
-
- # Parse arguments
- parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
- argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
- args <- as.list(as.character(argsDF$V2))
- names(args) <- argsDF$V1
-
- #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda")
- #load("/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda")
-
- # Extract input
- input_type = args$input_type
- if (input_type == "list") {
- list_id = strsplit(args$input, "[ \t\n]+")[[1]]
- } else if (input_type == "file") {
- filename = args$input
- column_number = as.numeric(gsub("c", "" ,args$column_number))
- header = str2bool(args$header)
- file = read_file(filename, header)
- list_id = sapply(strsplit(file[,column_number], ";"), "[", 1)
- }
- input = list_id
-
- # Read reference file
- reference_file = read_file(args$ref_file, TRUE)
-
- # Extract other options
- atlas = args$atlas
- not_mapped_option = str2bool(args$not_mapped)
- if (atlas=="normal") {
- tissue = strsplit(args$tissue, ",")[[1]]
- level = strsplit(args$level, ",")[[1]]
- reliability = strsplit(args$reliability, ",")[[1]]
- # Calculation
- res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option)
- } else if (atlas=="cancer") {
- cancer = strsplit(args$cancer, ",")[[1]]
- # Calculation
- res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option)
- }
-
- # Write output
- output = args$output
- res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))
- write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE)
-}
-
-main()
diff -r db2cd451835f -r 3bedd074c485 sel_ann_hpa.xml
--- a/sel_ann_hpa.xml Thu Dec 06 10:07:13 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,252 +0,0 @@
-
- by tissue (normal or tumor tissue Human Protein Atlas)
-
-
-
-
-
-
-
- $__tool_directory__/sel_ann_hpa.R
- --ref_file="$__tool_directory__/$options.proteinatlas"
- --input_type="$input.ids"
- #if $input.ids == "list"
- --input="$input.list"
- #else
- --input="$input.file"
- --column_number="$input.ncol"
- --header="$input.header"
- #end if
-
- #if $options.database == "normal"
- --atlas="normal"
- --tissue="$options.normal_tissue"
- --level="$options.level"
- --reliability="$options.reliability"
- #else if $options.database == "tumor"
- --atlas="cancer"
- --cancer="$options.cancer_tissue"
- #end if
- --not_mapped="$not_mapped"
- --output="$hpa_output"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r db2cd451835f -r 3bedd074c485 tool-data/proteinatlas.loc.sample
--- a/tool-data/proteinatlas.loc.sample Thu Dec 06 10:07:13 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#This file lists the locations and dbkeys of Human Protein Atlas files.
-#
-#full Full Human Protein Atlas (23/10/18) tool-data/HPA_full_atlas_23-10-2018.tsv
-normal_tissue HPA Normal Tissue (23/10/18) tool-data/HPA_normal_tissue_23-10-2018.tsv
-pathology HPA Tumor Tissue (23/10/18) tool-data/HPA_pathology_23-10-2018.tsv
diff -r db2cd451835f -r 3bedd074c485 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Thu Dec 06 10:07:13 2018 -0500
+++ b/tool_data_table_conf.xml.sample Wed Mar 06 09:19:05 2019 -0500
@@ -1,7 +1,10 @@
-
-
- name, value, path
-
+
+ id, name, tissue, value
+
+
+
+ id, name, tissue, value
+
\ No newline at end of file