Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
comparison add_expression_HPA.R @ 0:234d114cbe3a draft
planemo upload commit 2ee714c3e3d1cce461125dbc041904e03ea8bac1-dirty
| author | proteore |
|---|---|
| date | Mon, 12 Nov 2018 11:05:24 -0500 |
| parents | |
| children | 03487ed0f458 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:234d114cbe3a |
|---|---|
| 1 # Read file and return file content as data.frame | |
| 2 read_file <- function(path,header){ | |
| 3 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) | |
| 4 if (inherits(file,"try-error")){ | |
| 5 stop("File not found !") | |
| 6 }else{ | |
| 7 return(file) | |
| 8 } | |
| 9 } | |
| 10 | |
| 11 #convert a string to boolean | |
| 12 str2bool <- function(x){ | |
| 13 if (any(is.element(c("t","true"),tolower(x)))){ | |
| 14 return (TRUE) | |
| 15 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
| 16 return (FALSE) | |
| 17 }else{ | |
| 18 return(NULL) | |
| 19 } | |
| 20 } | |
| 21 | |
| 22 add_expression = function(input, atlas, options) { | |
| 23 if (all(!input %in% atlas$Ensembl)) { | |
| 24 return(NULL) | |
| 25 } else { | |
| 26 res = atlas[match(input,atlas$Ensembl),c("Ensembl",options)] | |
| 27 res <- as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA | |
| 28 return(res) | |
| 29 } | |
| 30 } | |
| 31 | |
| 32 main = function() { | |
| 33 args <- commandArgs(TRUE) | |
| 34 if(length(args)<1) { | |
| 35 args <- c("--help") | |
| 36 } | |
| 37 | |
| 38 # Help section | |
| 39 if("--help" %in% args) { | |
| 40 cat("Selection and Annotation HPA | |
| 41 Arguments: | |
| 42 --inputtype: type of input (list of id or filename) | |
| 43 --input: either a file name (e.g : input.txt) or a list of blank-separated | |
| 44 ENSG identifiers (e.g : ENSG00000283071 ENSG00000283072) | |
| 45 --atlas: path to protein atlas file | |
| 46 --column: the column number which you would like to apply... | |
| 47 --header: true/false if your file contains a header | |
| 48 --select: information from HPA to select, maybe: | |
| 49 RNA.tissue.category,Reliability..IH.,Reliability..IF. (comma-separated) | |
| 50 --output: text output filename \n") | |
| 51 q(save="no") | |
| 52 } | |
| 53 | |
| 54 # Parse arguments | |
| 55 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
| 56 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
| 57 args <- as.list(as.character(argsDF$V2)) | |
| 58 names(args) <- argsDF$V1 | |
| 59 | |
| 60 save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") | |
| 61 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") | |
| 62 | |
| 63 inputtype = args$inputtype | |
| 64 if (inputtype == "copypaste") { | |
| 65 input = strsplit(args$input, "[ \t\n]+")[[1]] | |
| 66 } else if (inputtype == "tabfile") { | |
| 67 filename = args$input | |
| 68 ncol = args$column | |
| 69 # Check ncol | |
| 70 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | |
| 71 stop("Please enter an integer for level") | |
| 72 } else { | |
| 73 ncol = as.numeric(gsub("c", "", ncol)) | |
| 74 } | |
| 75 header = str2bool(args$header) | |
| 76 file = read_file(filename, header) | |
| 77 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) | |
| 78 } | |
| 79 | |
| 80 # Read protein atlas | |
| 81 protein_atlas = args$atlas | |
| 82 protein_atlas = read_file(protein_atlas, T) | |
| 83 | |
| 84 # Add expression | |
| 85 output = args$output | |
| 86 options = strsplit(args$select, ",")[[1]] | |
| 87 res = add_expression(input, protein_atlas, options) | |
| 88 | |
| 89 | |
| 90 # Write output | |
| 91 if (is.null(res)) { | |
| 92 write.table("None of the input ENSG ids are can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | |
| 93 } else { | |
| 94 if (inputtype == "copypaste") { | |
| 95 output_content = cbind(as.matrix(input), res) | |
| 96 colnames(output_content)[1] = "Ensembl" | |
| 97 } else if (inputtype == "tabfile") { | |
| 98 output_content = merge(file, res, by.x=ncol, by.y=1, incomparables = NA,all.x=T) | |
| 99 output_content = output_content[order(output_content[,ncol],decreasing = T),] | |
| 100 } | |
| 101 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) | |
| 102 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 main() |
