Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
diff add_expression_HPA.R @ 0:234d114cbe3a draft
planemo upload commit 2ee714c3e3d1cce461125dbc041904e03ea8bac1-dirty
| author | proteore |
|---|---|
| date | Mon, 12 Nov 2018 11:05:24 -0500 |
| parents | |
| children | 03487ed0f458 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_expression_HPA.R Mon Nov 12 11:05:24 2018 -0500 @@ -0,0 +1,106 @@ +# Read file and return file content as data.frame +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +#convert a string to boolean +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +add_expression = function(input, atlas, options) { + if (all(!input %in% atlas$Ensembl)) { + return(NULL) + } else { + res = atlas[match(input,atlas$Ensembl),c("Ensembl",options)] + res <- as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA + return(res) + } +} + +main = function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --inputtype: type of input (list of id or filename) + --input: either a file name (e.g : input.txt) or a list of blank-separated + ENSG identifiers (e.g : ENSG00000283071 ENSG00000283072) + --atlas: path to protein atlas file + --column: the column number which you would like to apply... + --header: true/false if your file contains a header + --select: information from HPA to select, maybe: + RNA.tissue.category,Reliability..IH.,Reliability..IF. (comma-separated) + --output: text output filename \n") + q(save="no") + } + + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") + + inputtype = args$inputtype + if (inputtype == "copypaste") { + input = strsplit(args$input, "[ \t\n]+")[[1]] + } else if (inputtype == "tabfile") { + filename = args$input + ncol = args$column + # Check ncol + if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { + stop("Please enter an integer for level") + } else { + ncol = as.numeric(gsub("c", "", ncol)) + } + header = str2bool(args$header) + file = read_file(filename, header) + input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) + } + + # Read protein atlas + protein_atlas = args$atlas + protein_atlas = read_file(protein_atlas, T) + + # Add expression + output = args$output + options = strsplit(args$select, ",")[[1]] + res = add_expression(input, protein_atlas, options) + + + # Write output + if (is.null(res)) { + write.table("None of the input ENSG ids are can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) + } else { + if (inputtype == "copypaste") { + output_content = cbind(as.matrix(input), res) + colnames(output_content)[1] = "Ensembl" + } else if (inputtype == "tabfile") { + output_content = merge(file, res, by.x=ncol, by.y=1, incomparables = NA,all.x=T) + output_content = output_content[order(output_content[,ncol],decreasing = T),] + } + output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) + write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) + } +} + +main()
