diff heatmap_viz.R @ 2:99207b432ebc draft

planemo upload commit c599cfc156c77626df2b674bdfbd437b9f664ab9
author proteore
date Thu, 13 Dec 2018 04:14:21 -0500
parents 4651551b48e4
children 07748b0136bb
line wrap: on
line diff
--- a/heatmap_viz.R	Wed Sep 12 09:37:26 2018 -0400
+++ b/heatmap_viz.R	Thu Dec 13 04:14:21 2018 -0500
@@ -1,7 +1,7 @@
 #!/usr/bin/Rscript
 
-suppressMessages(library('plotly'))
-suppressMessages(library('heatmaply'))
+suppressMessages(library('plotly',quietly = T))
+suppressMessages(library('heatmaply',quietly = T))
 
 #packageVersion('plotly')
 
@@ -27,6 +27,7 @@
       --row_names             Column which contains row names
       --header                True or False
       --col_text_angle        Angle of columns label ; from -90 to 90 degres
+      --dist_fun              function used to compute the distance
 
       Example:
       ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n")
@@ -43,7 +44,7 @@
 }
 
 read_file <- function(path,header){
-  file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE),silent=TRUE)
+  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE)
   if (inherits(file,"try-error")){
     stop("File not found !")
   }else{
@@ -63,48 +64,77 @@
 }
 
 #remove remaining quote 
+#only keep usefull columns
 #remove lines with at least one empty cell in a matrix between two defined columns
-clean_df <- function(mat,first_col,last_col,rownames){
-  tmp = mat[,first_col:last_col]
-  tmp <- as.data.frame(apply(tmp,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)}))
-  bad_lines <- which(apply(tmp, 1, function(x) any(is.na(x))))
-  mat <- cbind(mat[,as.numeric(rownames)],tmp)
-  if (length(bad_lines) > 0) {
-    mat <- mat[- bad_lines,]
-    print(paste("lines",bad_lines, "has been removed: at least one non numeric content"))
+clean_df <- function(mat,cols,rownames_col){
+  uto = mat[,cols]
+  uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x)))
+  uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)}))
+  rownames(uto) <- mat[,rownames_col]
+  #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x))))
+  #if (length(bad_lines) > 0) {
+  #  uto <- uto[- bad_lines,]
+  #  print(paste("lines",bad_lines, "has been removed: at least one non numeric content"))
+  #}
+  return(uto)
+}
+
+get_cols <-function(input_cols) {
+  input_cols <- gsub("c","",input_cols)
+  if (grepl(":",input_cols)) {
+    first_col=unlist(strsplit(input_cols,":"))[1]
+    last_col=unlist(strsplit(input_cols,":"))[2]
+    cols=first_col:last_col
+  } else {
+    cols = as.integer(unlist(strsplit(input_cols,",")))
   }
-  return(mat)
+  return(cols)
 }
 
 #get args
 args <- get_args()
 
+#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda")
+#load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda")
+
 header=str2bool(args$header)
 output <- rapply(strsplit(args$output,"\\."),c) #remove extension
 output <- paste(output[1:length(output)-1],collapse=".")
 output <- paste(output,args$type,sep=".")
-first_col=as.numeric(substr(args$cols,1,1))
-last_col=as.numeric(substr(args$cols,3,3))
+cols = get_cols(args$cols)
+rownames_col = as.integer(gsub("c","",args$row_names))
+if (length(cols) <=1 ){
+  stop("You need several colums to build a heatmap")
+}
+dist=args$dist
+clust=args$clust
+dendrogram=args$dendrogram
 
 #cleaning data
-uto <- read_file(args$input,header = header)
-uto <- clean_df(uto,first_col,last_col,args$row_names)
-data <- as.data.frame(uto[,-1])
-row_names = uto[,1]
+uto <- read_file(args$input,header)
+uto <- clean_df(uto,cols,rownames_col)
+uto <- uto[rowSums(is.na(uto)) != ncol(uto), ]  #remove emptylines
+
 if (header) {
   col_names = names(data)
 } else {
-  col_names = c(first_col:last_col)
+  col_names = cols
 }
 
 #building heatmap
-heatmaply(data, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = row_names, labCol = col_names,
-          grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), width = 1000, height=1000, colors = c('blue','green','yellow','red'))
-
+if (dist %in% c("pearson","spearman","kendall")){
+  heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist, 
+            hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), 
+            width = 1000, height=1000, colors = c('blue','green','yellow','red'))
+} else {
+  heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist, 
+          hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), 
+          width = 1000, height=1000, colors = c('blue','green','yellow','red'))
+}
 
 ####heatmaply
 
-simulateExprData <- function(n, n0, p, rho0, rho1){ 
+simulateExprData <- function(n, n0, p, rho0, rho1){ row 
   # n: total number of subjects 
   # n0: number of subjects with exposure 0 
   # n1: number of subjects with exposure 1