Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
comparison add_expression_HPA.R @ 6:ea59f5750c51 draft
planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
| author | proteore |
|---|---|
| date | Thu, 27 Jun 2019 03:56:26 -0400 |
| parents | d7d2a4512059 |
| children | fda784211e3a |
comparison
equal
deleted
inserted
replaced
| 5:e919b55188ab | 6:ea59f5750c51 |
|---|---|
| 14 return (TRUE) | 14 return (TRUE) |
| 15 }else if (any(is.element(c("f","false"),tolower(x)))){ | 15 }else if (any(is.element(c("f","false"),tolower(x)))){ |
| 16 return (FALSE) | 16 return (FALSE) |
| 17 }else{ | 17 }else{ |
| 18 return(NULL) | 18 return(NULL) |
| 19 } | |
| 20 } | |
| 21 | |
| 22 stopQuietly <- function(...) { | |
| 23 blankMsg <- sprintf("\r%s\r", paste(rep(" ", getOption("width")-1L), collapse=" ")); | |
| 24 stop(simpleError(blankMsg)); | |
| 25 } # stopQuietly() | |
| 26 | |
| 27 check_ensembl_geneids <- function(vector,type) { | |
| 28 ensembl_geneid_pattern = "^ENS[A-Z]+[0-9]{11}$|^[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?$|^CG[0-9]+$|^[A-Z0-9]+[.][0-9]+$|^YM[A-Z][0-9]{3}[a-z][0-9]$" | |
| 29 res = grepl(ensembl_geneid_pattern,vector) | |
| 30 if (all(!res)){ | |
| 31 cat("No Ensembl geneIDs found in entered ids") | |
| 32 stopQuietly() | |
| 33 } else if (any(!res)) { | |
| 34 cat(paste(sep="",collapse = " ",c(sum(!res, na.rm=TRUE),'IDs are not ENSG IDs, please check:\n'))) | |
| 35 not_geneids <- sapply(vector[which(!res)], function(x) paste(sep="",collapse = "",x,"\n"),USE.NAMES = F) | |
| 36 cat(not_geneids) | |
| 19 } | 37 } |
| 20 } | 38 } |
| 21 | 39 |
| 22 add_expression = function(input, atlas, options) { | 40 add_expression = function(input, atlas, options) { |
| 23 input <- unique(input[!is.na(input)]) | 41 input <- unique(input[!is.na(input)]) |
| 87 colnames(res)=colnames(tab) | 105 colnames(res)=colnames(tab) |
| 88 } | 106 } |
| 89 return(res) | 107 return(res) |
| 90 } | 108 } |
| 91 | 109 |
| 92 main = function() { | 110 get_args <- function(){ |
| 93 args <- commandArgs(TRUE) | 111 args <- commandArgs(TRUE) |
| 94 if(length(args)<1) { | 112 if(length(args)<1) { |
| 95 args <- c("--help") | 113 args <- c("--help") |
| 96 } | 114 } |
| 97 | 115 |
| 114 # Parse arguments | 132 # Parse arguments |
| 115 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 133 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
| 116 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 134 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
| 117 args <- as.list(as.character(argsDF$V2)) | 135 args <- as.list(as.character(argsDF$V2)) |
| 118 names(args) <- argsDF$V1 | 136 names(args) <- argsDF$V1 |
| 137 | |
| 138 return(args) | |
| 139 } | |
| 140 | |
| 141 is_col_in_file <- function(file,ncol) { | |
| 142 is_in_file = (ncol <= ncol(file) && ncol > 0) | |
| 143 if (!is_in_file){ | |
| 144 cat(paste(sep = "", collapse = " ", c("Column",ncol,"not found in file") )) | |
| 145 stopQuietly() | |
| 146 } | |
| 147 } | |
| 148 | |
| 149 main = function() { | |
| 150 | |
| 151 args = get_args() | |
| 119 | 152 |
| 120 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") | 153 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") |
| 121 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") | 154 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") |
| 122 | 155 |
| 123 inputtype = args$inputtype | 156 inputtype = args$inputtype |
| 124 if (inputtype == "copypaste") { | 157 if (inputtype == "copypaste") { |
| 125 input = strsplit(args$input, "[ \t\n]+")[[1]] | 158 ids = strsplit(args$input, "[ \t\n]+")[[1]] |
| 126 } else if (inputtype == "tabfile") { | 159 } else if (inputtype == "tabfile") { |
| 127 filename = args$input | 160 filename = args$input |
| 128 ncol = args$column | 161 ncol = args$column |
| 129 # Check ncol | 162 # Check ncol |
| 130 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 163 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
| 132 } else { | 165 } else { |
| 133 ncol = as.numeric(gsub("c", "", ncol)) | 166 ncol = as.numeric(gsub("c", "", ncol)) |
| 134 } | 167 } |
| 135 header = str2bool(args$header) | 168 header = str2bool(args$header) |
| 136 file = read_file(filename, header) | 169 file = read_file(filename, header) |
| 170 is_col_in_file(file,ncol) | |
| 137 file = one_id_one_line(file,ncol) | 171 file = one_id_one_line(file,ncol) |
| 138 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) | 172 ids = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) |
| 139 input = input[which(!is.na(input))] | 173 ids = ids[which(!is.na(ids))] |
| 140 } | 174 } |
| 175 check_ensembl_geneids(ids) | |
| 141 | 176 |
| 142 # Read protein atlas | 177 # Read protein atlas |
| 143 protein_atlas = args$atlas | 178 protein_atlas = args$atlas |
| 144 protein_atlas = read_file(protein_atlas, T) | 179 protein_atlas = read_file(protein_atlas, T) |
| 145 | 180 |
| 146 # Add expression | 181 # Add expression |
| 147 output = args$output | 182 output = args$output |
| 148 options = strsplit(args$select, ",")[[1]] | 183 options = strsplit(args$select, ",")[[1]] |
| 149 res = add_expression(input, protein_atlas, options) | 184 res = add_expression(ids, protein_atlas, options) |
| 150 | 185 |
| 151 # Write output | 186 # Write output |
| 152 if (is.null(res)) { | 187 if (is.null(res)) { |
| 153 write.table("None of the input ENSG ids are can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | 188 write.table("None of the ENSG ids entered can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
| 154 } else { | 189 } else { |
| 155 if (inputtype == "copypaste") { | 190 if (inputtype == "copypaste") { |
| 156 input <- data.frame(input) | 191 ids <- data.frame(ids) |
| 157 output_content = merge(input,res,by.x=1,by.y="row.names",incomparables = NA, all.x=T) | 192 output_content = merge(ids,res,by.x=1,by.y="row.names",incomparables = NA, all.x=T) |
| 158 colnames(output_content)[1] = "Ensembl" | 193 colnames(output_content)[1] = "Ensembl" |
| 159 } else if (inputtype == "tabfile") { | 194 } else if (inputtype == "tabfile") { |
| 160 output_content = merge(file, res, by.x=ncol, by.y="row.names", incomparables = NA, all.x=T) | 195 output_content = merge(file, res, by.x=ncol, by.y="row.names", incomparables = NA, all.x=T) |
| 161 output_content = order_columns(output_content,ncol) | 196 output_content = order_columns(output_content,ncol) |
| 162 } | 197 } |
