# HG changeset patch # User proteore # Date 1551881945 18000 # Node ID 3bedd074c48595067b583615d1a4bf4a175f0352 # Parent db2cd451835ffcc0e8e41bc600dfdf41ef7273de planemo upload commit c6e1fd1f68e81ce9eea6ad66adee21070f2893ef-dirty diff -r db2cd451835f -r 3bedd074c485 README.rst --- a/README.rst Thu Dec 06 10:07:13 2018 -0500 +++ b/README.rst Wed Mar 06 09:19:05 2019 -0500 @@ -1,9 +1,9 @@ -Wrapper for Get expression data by tissue Tool +Wrapper for Get expression profiles by tissue Tool ================================================= **Authors** -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform diff -r db2cd451835f -r 3bedd074c485 get_expression_profiles.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_expression_profiles.R Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,146 @@ +# Read file and return file content as data.frame +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +# input has to be a list of IDs in ENSG format +# tissue is one of unique(HPA.normal.tissue$Tissue) +# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low" +# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain" +annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) { + dat <- subset(HPA_normal_tissue, Gene %in% input) + res.Tissue<-subset(dat, Tissue %in% tissue) + res.Level<-subset(res.Tissue, Level %in% level) + res.Rel<-subset(res.Level, Reliability %in% reliability) + + if (not_mapped_option) { + if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) { + not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))) + not.match <- matrix("no match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs)) + not.match <- cbind(not_match_IDs, unname(not.match)) + colnames(not.match) <- colnames(HPA_normal_tissue) + res <- rbind(res.Rel, not.match) + } else { + res <- res.Rel + } + + if (length(setdiff(input, unique(dat$Gene)))>0) { + not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene)))) + not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) + colnames(not.mapped) <- colnames(HPA_normal_tissue) + res <- rbind(res, not.mapped) + } + + } else { + res <- res.Rel + } + + return(res) + +} + +annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) { + dat <- subset(HPA_cancer_tissue, Gene %in% input) + res.Cancer<-subset(dat, Cancer %in% cancer) + + if (not_mapped_option) { + not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene)))) + not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) + colnames(not.mapped) <- colnames(HPA_cancer_tissue) + res <- rbind(res.Cancer, not.mapped) + } else { + res <- res.Cancer + } + return(res) +} + + +main <- function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --ref_file: HPA normal/cancer tissue file path + --input_type: type of input (list of id or filename) + --input: list of IDs in ENSG format + --column_number: the column number which you would like to apply... + --header: true/false if your file contains a header + --atlas: normal/cancer + if normal: + --tissue: list of tissues + --level: Not detected, Low, Medium, High + --reliability: Supportive, Uncertain + if cancer: + --cancer: Cancer tissues + --not_mapped: true/false if your output file should contain not-mapped and not-match IDs + --output: output filename \n") + q(save="no") + } + + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_expression_profiles/args.rda") + + # Extract input + input_type = args$input_type + if (input_type == "list") { + list_id = strsplit(args$input, "[ \t\n]+")[[1]] + } else if (input_type == "file") { + filename = args$input + column_number = as.numeric(gsub("c", "" ,args$column_number)) + header = str2bool(args$header) + file = read_file(filename, header) + list_id = sapply(strsplit(file[,column_number], ";"), "[", 1) + } + input = list_id + + # Read reference file + reference_file = read_file(args$ref_file, TRUE) + + # Extract other options + atlas = args$atlas + not_mapped_option = str2bool(args$not_mapped) + if (atlas=="normal") { + tissue = strsplit(args$tissue, ",")[[1]] + level = strsplit(args$level, ",")[[1]] + reliability = strsplit(args$reliability, ",")[[1]] + # Calculation + res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option) + } else if (atlas=="cancer") { + cancer = strsplit(args$cancer, ",")[[1]] + # Calculation + res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option) + } + + # Write output + output = args$output + res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)) + write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) +} + +main() diff -r db2cd451835f -r 3bedd074c485 get_expression_profiles.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_expression_profiles.xml Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,254 @@ + + by (normal or tumor) tissue/cell type [Human Protein Atlas] + + + + + + + + $__tool_directory__/get_expression_profiles.R + + #if "protein_atlas" in str($options.proteinatlas).split("/") + --ref_file="$options.proteinatlas" + #else + --ref_file="$__tool_directory__/$options.proteinatlas" + #end if + + --input_type="$input.ids" + #if $input.ids == "list" + --input="$input.list" + #else + --input="$input.file" + --column_number="$input.ncol" + --header="$input.header" + #end if + + #if $options.database == "normal" + --atlas="normal" + --tissue="$options.normal_tissue" + --level="$options.level" + --reliability="$options.reliability" + #else if $options.database == "tumor" + --atlas="cancer" + --cancer="$options.cancer_tissue" + #end if + --not_mapped="$not_mapped" + --output="$hpa_output" + + + + + + + + + + + + + + + + + + + + + + + + [c]{0,1}[0-9]+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r db2cd451835f -r 3bedd074c485 proteore_protein_atlas_normal_tissue.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteore_protein_atlas_normal_tissue.loc.sample Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,2 @@ +# +HPA_normal_tissue_19-07-2018 HPA normal tissue 19/07/2018 HPA_normal_tissue /tool-data/HPA_normal_tissue_23-10-2018.tsv diff -r db2cd451835f -r 3bedd074c485 proteore_protein_atlas_tumor_tissue.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteore_protein_atlas_tumor_tissue.loc.sample Wed Mar 06 09:19:05 2019 -0500 @@ -0,0 +1,2 @@ +# +HPA_pathology_19-07-2018 HPA pathology 19/07/2018 HPA_pathology /tool-data/HPA_pathology_23-10-2018.tsv diff -r db2cd451835f -r 3bedd074c485 sel_ann_hpa.R --- a/sel_ann_hpa.R Thu Dec 06 10:07:13 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -# Read file and return file content as data.frame -read_file <- function(path,header){ - file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) - if (inherits(file,"try-error")){ - stop("File not found !") - }else{ - return(file) - } -} - -str2bool <- function(x){ - if (any(is.element(c("t","true"),tolower(x)))){ - return (TRUE) - }else if (any(is.element(c("f","false"),tolower(x)))){ - return (FALSE) - }else{ - return(NULL) - } -} - -# input has to be a list of IDs in ENSG format -# tissue is one of unique(HPA.normal.tissue$Tissue) -# level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low" -# reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain" -annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) { - dat <- subset(HPA_normal_tissue, Gene %in% input) - res.Tissue<-subset(dat, Tissue %in% tissue) - res.Level<-subset(res.Tissue, Level %in% level) - res.Rel<-subset(res.Level, Reliability %in% reliability) - - if (not_mapped_option) { - if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) { - not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))) - not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs)) - not.match <- cbind(not_match_IDs, unname(not.match)) - colnames(not.match) <- colnames(HPA_normal_tissue) - res <- rbind(res.Rel, not.match) - } else { - res <- res.Rel - } - - if (length(setdiff(input, unique(dat$Gene)))>0) { - not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene)))) - not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) - colnames(not.mapped) <- colnames(HPA_normal_tissue) - res <- rbind(res, not.mapped) - } - - } else { - res <- res.Rel - } - - return(res) - -} - -annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) { - dat <- subset(HPA_cancer_tissue, Gene %in% input) - res.Cancer<-subset(dat, Cancer %in% cancer) - - if (not_mapped_option) { - not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene)))) - not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) - colnames(not.mapped) <- colnames(HPA_cancer_tissue) - res <- rbind(res.Cancer, not.mapped) - } else { - res <- res.Cancer - } - return(res) -} - - -main <- function() { - args <- commandArgs(TRUE) - if(length(args)<1) { - args <- c("--help") - } - - # Help section - if("--help" %in% args) { - cat("Selection and Annotation HPA - Arguments: - --ref_file: HPA normal/cancer tissue file path - --input_type: type of input (list of id or filename) - --input: list of IDs in ENSG format - --column_number: the column number which you would like to apply... - --header: true/false if your file contains a header - --atlas: normal/cancer - if normal: - --tissue: list of tissues - --level: Not detected, Low, Medium, High - --reliability: Supportive, Uncertain - if cancer: - --cancer: Cancer tissues - --not_mapped: true/false if your output file should contain not-mapped and not-match IDs - --output: output filename \n") - q(save="no") - } - - # Parse arguments - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - - #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda") - #load("/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda") - - # Extract input - input_type = args$input_type - if (input_type == "list") { - list_id = strsplit(args$input, "[ \t\n]+")[[1]] - } else if (input_type == "file") { - filename = args$input - column_number = as.numeric(gsub("c", "" ,args$column_number)) - header = str2bool(args$header) - file = read_file(filename, header) - list_id = sapply(strsplit(file[,column_number], ";"), "[", 1) - } - input = list_id - - # Read reference file - reference_file = read_file(args$ref_file, TRUE) - - # Extract other options - atlas = args$atlas - not_mapped_option = str2bool(args$not_mapped) - if (atlas=="normal") { - tissue = strsplit(args$tissue, ",")[[1]] - level = strsplit(args$level, ",")[[1]] - reliability = strsplit(args$reliability, ",")[[1]] - # Calculation - res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option) - } else if (atlas=="cancer") { - cancer = strsplit(args$cancer, ",")[[1]] - # Calculation - res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option) - } - - # Write output - output = args$output - res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)) - write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) -} - -main() diff -r db2cd451835f -r 3bedd074c485 sel_ann_hpa.xml --- a/sel_ann_hpa.xml Thu Dec 06 10:07:13 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,252 +0,0 @@ - - by tissue (normal or tumor tissue Human Protein Atlas) - - - - - - - - $__tool_directory__/sel_ann_hpa.R - --ref_file="$__tool_directory__/$options.proteinatlas" - --input_type="$input.ids" - #if $input.ids == "list" - --input="$input.list" - #else - --input="$input.file" - --column_number="$input.ncol" - --header="$input.header" - #end if - - #if $options.database == "normal" - --atlas="normal" - --tissue="$options.normal_tissue" - --level="$options.level" - --reliability="$options.reliability" - #else if $options.database == "tumor" - --atlas="cancer" - --cancer="$options.cancer_tissue" - #end if - --not_mapped="$not_mapped" - --output="$hpa_output" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r db2cd451835f -r 3bedd074c485 tool-data/proteinatlas.loc.sample --- a/tool-data/proteinatlas.loc.sample Thu Dec 06 10:07:13 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#This file lists the locations and dbkeys of Human Protein Atlas files. -# -#full Full Human Protein Atlas (23/10/18) tool-data/HPA_full_atlas_23-10-2018.tsv -normal_tissue HPA Normal Tissue (23/10/18) tool-data/HPA_normal_tissue_23-10-2018.tsv -pathology HPA Tumor Tissue (23/10/18) tool-data/HPA_pathology_23-10-2018.tsv diff -r db2cd451835f -r 3bedd074c485 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Thu Dec 06 10:07:13 2018 -0500 +++ b/tool_data_table_conf.xml.sample Wed Mar 06 09:19:05 2019 -0500 @@ -1,7 +1,10 @@ - - - name, value, path - +
+ id, name, tissue, value + +
+ + id, name, tissue, value +
\ No newline at end of file