Mercurial > repos > proteore > proteore_get_expression_profiles
comparison get_expression_profiles.R @ 0:1c9562e9ea67 draft default tip
planemo upload commit c599cfc156c77626df2b674bdfbd437b9f664ab9
| author | proteore |
|---|---|
| date | Thu, 13 Dec 2018 04:06:48 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1c9562e9ea67 |
|---|---|
| 1 # Read file and return file content as data.frame | |
| 2 read_file <- function(path,header){ | |
| 3 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) | |
| 4 if (inherits(file,"try-error")){ | |
| 5 stop("File not found !") | |
| 6 }else{ | |
| 7 return(file) | |
| 8 } | |
| 9 } | |
| 10 | |
| 11 str2bool <- function(x){ | |
| 12 if (any(is.element(c("t","true"),tolower(x)))){ | |
| 13 return (TRUE) | |
| 14 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
| 15 return (FALSE) | |
| 16 }else{ | |
| 17 return(NULL) | |
| 18 } | |
| 19 } | |
| 20 | |
| 21 # input has to be a list of IDs in ENSG format | |
| 22 # tissue is one of unique(HPA.normal.tissue$Tissue) | |
| 23 # level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low" | |
| 24 # reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain" | |
| 25 annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) { | |
| 26 dat <- subset(HPA_normal_tissue, Gene %in% input) | |
| 27 res.Tissue<-subset(dat, Tissue %in% tissue) | |
| 28 res.Level<-subset(res.Tissue, Level %in% level) | |
| 29 res.Rel<-subset(res.Level, Reliability %in% reliability) | |
| 30 | |
| 31 if (not_mapped_option) { | |
| 32 if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) { | |
| 33 not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))) | |
| 34 not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs)) | |
| 35 not.match <- cbind(not_match_IDs, unname(not.match)) | |
| 36 colnames(not.match) <- colnames(HPA_normal_tissue) | |
| 37 res <- rbind(res.Rel, not.match) | |
| 38 } else { | |
| 39 res <- res.Rel | |
| 40 } | |
| 41 | |
| 42 if (length(setdiff(input, unique(dat$Gene)))>0) { | |
| 43 not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene)))) | |
| 44 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) | |
| 45 colnames(not.mapped) <- colnames(HPA_normal_tissue) | |
| 46 res <- rbind(res, not.mapped) | |
| 47 } | |
| 48 | |
| 49 } else { | |
| 50 res <- res.Rel | |
| 51 } | |
| 52 | |
| 53 return(res) | |
| 54 | |
| 55 } | |
| 56 | |
| 57 annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) { | |
| 58 dat <- subset(HPA_cancer_tissue, Gene %in% input) | |
| 59 res.Cancer<-subset(dat, Cancer %in% cancer) | |
| 60 | |
| 61 if (not_mapped_option) { | |
| 62 not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene)))) | |
| 63 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) | |
| 64 colnames(not.mapped) <- colnames(HPA_cancer_tissue) | |
| 65 res <- rbind(res.Cancer, not.mapped) | |
| 66 } else { | |
| 67 res <- res.Cancer | |
| 68 } | |
| 69 return(res) | |
| 70 } | |
| 71 | |
| 72 | |
| 73 main <- function() { | |
| 74 args <- commandArgs(TRUE) | |
| 75 if(length(args)<1) { | |
| 76 args <- c("--help") | |
| 77 } | |
| 78 | |
| 79 # Help section | |
| 80 if("--help" %in% args) { | |
| 81 cat("Selection and Annotation HPA | |
| 82 Arguments: | |
| 83 --ref_file: HPA normal/cancer tissue file path | |
| 84 --input_type: type of input (list of id or filename) | |
| 85 --input: list of IDs in ENSG format | |
| 86 --column_number: the column number which you would like to apply... | |
| 87 --header: true/false if your file contains a header | |
| 88 --atlas: normal/cancer | |
| 89 if normal: | |
| 90 --tissue: list of tissues | |
| 91 --level: Not detected, Low, Medium, High | |
| 92 --reliability: Supportive, Uncertain | |
| 93 if cancer: | |
| 94 --cancer: Cancer tissues | |
| 95 --not_mapped: true/false if your output file should contain not-mapped and not-match IDs | |
| 96 --output: output filename \n") | |
| 97 q(save="no") | |
| 98 } | |
| 99 | |
| 100 # Parse arguments | |
| 101 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
| 102 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
| 103 args <- as.list(as.character(argsDF$V2)) | |
| 104 names(args) <- argsDF$V1 | |
| 105 | |
| 106 #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda") | |
| 107 #load("/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda") | |
| 108 | |
| 109 # Extract input | |
| 110 input_type = args$input_type | |
| 111 if (input_type == "list") { | |
| 112 list_id = strsplit(args$input, "[ \t\n]+")[[1]] | |
| 113 } else if (input_type == "file") { | |
| 114 filename = args$input | |
| 115 column_number = as.numeric(gsub("c", "" ,args$column_number)) | |
| 116 header = str2bool(args$header) | |
| 117 file = read_file(filename, header) | |
| 118 list_id = sapply(strsplit(file[,column_number], ";"), "[", 1) | |
| 119 } | |
| 120 input = list_id | |
| 121 | |
| 122 # Read reference file | |
| 123 reference_file = read_file(args$ref_file, TRUE) | |
| 124 | |
| 125 # Extract other options | |
| 126 atlas = args$atlas | |
| 127 not_mapped_option = str2bool(args$not_mapped) | |
| 128 if (atlas=="normal") { | |
| 129 tissue = strsplit(args$tissue, ",")[[1]] | |
| 130 level = strsplit(args$level, ",")[[1]] | |
| 131 reliability = strsplit(args$reliability, ",")[[1]] | |
| 132 # Calculation | |
| 133 res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option) | |
| 134 } else if (atlas=="cancer") { | |
| 135 cancer = strsplit(args$cancer, ",")[[1]] | |
| 136 # Calculation | |
| 137 res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option) | |
| 138 } | |
| 139 | |
| 140 # Write output | |
| 141 output = args$output | |
| 142 res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)) | |
| 143 write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) | |
| 144 } | |
| 145 | |
| 146 main() |
