comparison get_expression_profiles.R @ 0:1c9562e9ea67 draft default tip

planemo upload commit c599cfc156c77626df2b674bdfbd437b9f664ab9
author proteore
date Thu, 13 Dec 2018 04:06:48 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1c9562e9ea67
1 # Read file and return file content as data.frame
2 read_file <- function(path,header){
3 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
4 if (inherits(file,"try-error")){
5 stop("File not found !")
6 }else{
7 return(file)
8 }
9 }
10
11 str2bool <- function(x){
12 if (any(is.element(c("t","true"),tolower(x)))){
13 return (TRUE)
14 }else if (any(is.element(c("f","false"),tolower(x)))){
15 return (FALSE)
16 }else{
17 return(NULL)
18 }
19 }
20
21 # input has to be a list of IDs in ENSG format
22 # tissue is one of unique(HPA.normal.tissue$Tissue)
23 # level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low"
24 # reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain"
25 annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) {
26 dat <- subset(HPA_normal_tissue, Gene %in% input)
27 res.Tissue<-subset(dat, Tissue %in% tissue)
28 res.Level<-subset(res.Tissue, Level %in% level)
29 res.Rel<-subset(res.Level, Reliability %in% reliability)
30
31 if (not_mapped_option) {
32 if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) {
33 not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene))))
34 not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs))
35 not.match <- cbind(not_match_IDs, unname(not.match))
36 colnames(not.match) <- colnames(HPA_normal_tissue)
37 res <- rbind(res.Rel, not.match)
38 } else {
39 res <- res.Rel
40 }
41
42 if (length(setdiff(input, unique(dat$Gene)))>0) {
43 not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene))))
44 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
45 colnames(not.mapped) <- colnames(HPA_normal_tissue)
46 res <- rbind(res, not.mapped)
47 }
48
49 } else {
50 res <- res.Rel
51 }
52
53 return(res)
54
55 }
56
57 annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) {
58 dat <- subset(HPA_cancer_tissue, Gene %in% input)
59 res.Cancer<-subset(dat, Cancer %in% cancer)
60
61 if (not_mapped_option) {
62 not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene))))
63 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
64 colnames(not.mapped) <- colnames(HPA_cancer_tissue)
65 res <- rbind(res.Cancer, not.mapped)
66 } else {
67 res <- res.Cancer
68 }
69 return(res)
70 }
71
72
73 main <- function() {
74 args <- commandArgs(TRUE)
75 if(length(args)<1) {
76 args <- c("--help")
77 }
78
79 # Help section
80 if("--help" %in% args) {
81 cat("Selection and Annotation HPA
82 Arguments:
83 --ref_file: HPA normal/cancer tissue file path
84 --input_type: type of input (list of id or filename)
85 --input: list of IDs in ENSG format
86 --column_number: the column number which you would like to apply...
87 --header: true/false if your file contains a header
88 --atlas: normal/cancer
89 if normal:
90 --tissue: list of tissues
91 --level: Not detected, Low, Medium, High
92 --reliability: Supportive, Uncertain
93 if cancer:
94 --cancer: Cancer tissues
95 --not_mapped: true/false if your output file should contain not-mapped and not-match IDs
96 --output: output filename \n")
97 q(save="no")
98 }
99
100 # Parse arguments
101 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
102 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
103 args <- as.list(as.character(argsDF$V2))
104 names(args) <- argsDF$V1
105
106 #save(args,file = "/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda")
107 #load("/home/dchristiany/proteore_project/ProteoRE/tools/select_annotate_tissue/args.rda")
108
109 # Extract input
110 input_type = args$input_type
111 if (input_type == "list") {
112 list_id = strsplit(args$input, "[ \t\n]+")[[1]]
113 } else if (input_type == "file") {
114 filename = args$input
115 column_number = as.numeric(gsub("c", "" ,args$column_number))
116 header = str2bool(args$header)
117 file = read_file(filename, header)
118 list_id = sapply(strsplit(file[,column_number], ";"), "[", 1)
119 }
120 input = list_id
121
122 # Read reference file
123 reference_file = read_file(args$ref_file, TRUE)
124
125 # Extract other options
126 atlas = args$atlas
127 not_mapped_option = str2bool(args$not_mapped)
128 if (atlas=="normal") {
129 tissue = strsplit(args$tissue, ",")[[1]]
130 level = strsplit(args$level, ",")[[1]]
131 reliability = strsplit(args$reliability, ",")[[1]]
132 # Calculation
133 res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option)
134 } else if (atlas=="cancer") {
135 cancer = strsplit(args$cancer, ",")[[1]]
136 # Calculation
137 res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option)
138 }
139
140 # Write output
141 output = args$output
142 res <- apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))
143 write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE)
144 }
145
146 main()