Mercurial > repos > proteore > proteore_heatmap_visualization
diff heatmap_viz.R @ 2:99207b432ebc draft
planemo upload commit c599cfc156c77626df2b674bdfbd437b9f664ab9
| author | proteore |
|---|---|
| date | Thu, 13 Dec 2018 04:14:21 -0500 |
| parents | 4651551b48e4 |
| children | 07748b0136bb |
line wrap: on
line diff
--- a/heatmap_viz.R Wed Sep 12 09:37:26 2018 -0400 +++ b/heatmap_viz.R Thu Dec 13 04:14:21 2018 -0500 @@ -1,7 +1,7 @@ #!/usr/bin/Rscript -suppressMessages(library('plotly')) -suppressMessages(library('heatmaply')) +suppressMessages(library('plotly',quietly = T)) +suppressMessages(library('heatmaply',quietly = T)) #packageVersion('plotly') @@ -27,6 +27,7 @@ --row_names Column which contains row names --header True or False --col_text_angle Angle of columns label ; from -90 to 90 degres + --dist_fun function used to compute the distance Example: ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n") @@ -43,7 +44,7 @@ } read_file <- function(path,header){ - file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE),silent=TRUE) + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE) if (inherits(file,"try-error")){ stop("File not found !") }else{ @@ -63,48 +64,77 @@ } #remove remaining quote +#only keep usefull columns #remove lines with at least one empty cell in a matrix between two defined columns -clean_df <- function(mat,first_col,last_col,rownames){ - tmp = mat[,first_col:last_col] - tmp <- as.data.frame(apply(tmp,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)})) - bad_lines <- which(apply(tmp, 1, function(x) any(is.na(x)))) - mat <- cbind(mat[,as.numeric(rownames)],tmp) - if (length(bad_lines) > 0) { - mat <- mat[- bad_lines,] - print(paste("lines",bad_lines, "has been removed: at least one non numeric content")) +clean_df <- function(mat,cols,rownames_col){ + uto = mat[,cols] + uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x))) + uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)})) + rownames(uto) <- mat[,rownames_col] + #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x)))) + #if (length(bad_lines) > 0) { + # uto <- uto[- bad_lines,] + # print(paste("lines",bad_lines, "has been removed: at least one non numeric content")) + #} + return(uto) +} + +get_cols <-function(input_cols) { + input_cols <- gsub("c","",input_cols) + if (grepl(":",input_cols)) { + first_col=unlist(strsplit(input_cols,":"))[1] + last_col=unlist(strsplit(input_cols,":"))[2] + cols=first_col:last_col + } else { + cols = as.integer(unlist(strsplit(input_cols,","))) } - return(mat) + return(cols) } #get args args <- get_args() +#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") +#load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") + header=str2bool(args$header) output <- rapply(strsplit(args$output,"\\."),c) #remove extension output <- paste(output[1:length(output)-1],collapse=".") output <- paste(output,args$type,sep=".") -first_col=as.numeric(substr(args$cols,1,1)) -last_col=as.numeric(substr(args$cols,3,3)) +cols = get_cols(args$cols) +rownames_col = as.integer(gsub("c","",args$row_names)) +if (length(cols) <=1 ){ + stop("You need several colums to build a heatmap") +} +dist=args$dist +clust=args$clust +dendrogram=args$dendrogram #cleaning data -uto <- read_file(args$input,header = header) -uto <- clean_df(uto,first_col,last_col,args$row_names) -data <- as.data.frame(uto[,-1]) -row_names = uto[,1] +uto <- read_file(args$input,header) +uto <- clean_df(uto,cols,rownames_col) +uto <- uto[rowSums(is.na(uto)) != ncol(uto), ] #remove emptylines + if (header) { col_names = names(data) } else { - col_names = c(first_col:last_col) + col_names = cols } #building heatmap -heatmaply(data, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = row_names, labCol = col_names, - grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), width = 1000, height=1000, colors = c('blue','green','yellow','red')) - +if (dist %in% c("pearson","spearman","kendall")){ + heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist, + hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), + width = 1000, height=1000, colors = c('blue','green','yellow','red')) +} else { + heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist, + hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), + width = 1000, height=1000, colors = c('blue','green','yellow','red')) +} ####heatmaply -simulateExprData <- function(n, n0, p, rho0, rho1){ +simulateExprData <- function(n, n0, p, rho0, rho1){ row # n: total number of subjects # n0: number of subjects with exposure 0 # n1: number of subjects with exposure 1
