Mercurial > repos > proteore > proteore_heatmap_visualization

--- a/heatmap.xml	Wed Sep 12 09:37:26 2018 -0400
+++ b/heatmap.xml	Thu Dec 13 04:14:21 2018 -0500
@@ -1,4 +1,5 @@
-<tool id="heatmap" name="heatmap visualization from uto table (heatmaply)" version="2018.09.12">
+<tool id="heatmap" name="HeatMap" version="2018.12.12">
+    <description></description>
     <requirements>
         <requirement type="package" version="4.7.1">r-plotly</requirement>
         <requirement type="package" version="0.14.1">r-heatmaply</requirement>
@@ -6,14 +7,62 @@
         <requirement type="package" version="2.2.1">pandoc</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        Rscript $__tool_directory__/heatmap_viz.R --input='$file' --output="$file.name"  --type='$output_type' --cols='$cols'
-            --row_names=$rownames --header='$header' --col_text_angle='$angle_col'
+        Rscript $__tool_directory__/heatmap_viz.R
+            --input='$file'
+            --output="$file.name"
+            --type='$output_type'
+            --cols='$select_data_columns.cols'
+            --row_names=$rownames
+            --header='$header'
+            --col_text_angle='$angle_col'
+            --dist="$distance"
+            --clust="$clustering"
+            --dendrogram="$dendrogram"
+
     ]]></command>
     <inputs>
         <param name="file" type="data" format="txt,tabular" label="Select a file (uto table)" help="" />
-        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
-        <param name="cols" type="text" value="" label="Enter columns to use from the first to the last separated by ':'" help='example : 3:8'/>
-        <param name="rownames" type="integer" value="1" label="Enter the column to use for row labels" help="for example : 1"/>
+        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file have a header?" />
+        <conditional name="select_data_columns">
+            <param name="enter_cols" type="select" label="Select columns or a range of columns to be used for heatmap building">
+                <option value="cols_number">Select columns to be used one by one</option>
+                <option value="cols_range">Select a range of columns to be used</option>
+            </param>
+            <when value="cols_number">
+                <param name="cols" type="text" label="Enter data columns to use for the heatmap separated by commas" help="For example : c3,c5,c7"/>
+            </when>
+            <when value="cols_range">
+                <param name="cols" type="text" label="Enter a range of data columns to use for the heatmap, first and last column separated bay ':'" help="For example : c2:c7"/>
+            </when>
+        </conditional>
+        <param name="rownames" type="text" value="c1" label="Enter the column to use for row labels" help="for example : c1"/>
+        <param name="distance" type="select" label="Distance measurement method" value="euclidean">
+            <option value="euclidean" selected="true">Euclidean</option>
+            <option value="pearson" selected="true">Pearson</option>
+            <option value="spearman">Spearman</option>
+            <option value="kendall">Kendall</option>
+            <option value="maximum">Maximum</option>
+            <option value="manhattan">Manhattan</option>
+            <option value="canberra">Canberra</option>
+            <option value="binary">Binary</option>
+            <option value="minkowski">Minkowski</option>
+        </param>
+        <param name="clustering" type="select" label="Clustering method" value="average">
+            <option value="ward.D">Ward</option>
+            <option value="ward.D2">Ward2</option>
+            <option value="single">Single linkage (nearest neighbor)</option>
+            <option value="complete">Complete linkage (farthest neighbor</option>
+            <option value="average" selected="true">Group average linkage (UPGMA)</option>
+            <option value="mcquitty">Simple average method (WPGMA)</option>
+            <!--option value="median">Median (WPGMC)</option>
+            <option value="centroid">Centroid (UPGMC)</option-->
+        </param>
+        <param name="dendrogram" type="select" label="Apply clustering on :" value="both">
+            <option value="row">Rows</option>
+            <option value="column">Columns</option>
+            <option value="both" selected="true">Rows and columns</option>
+            <option value="none">None</option>
+        </param>
         <param type="integer" name="angle_col" label="Angle of column labels" value="0" min="-90" max="90" />
         <param name="output_type" type="select" label="Choose the output format">
             <option value="html">html</option>
@@ -21,6 +70,7 @@
             <option value="jpeg">jpeg</option>
             <option value="png">png</option>
         </param>
+
     </inputs>
     <outputs>
         <data name="output" format="html">
@@ -46,18 +96,51 @@
         </test>
     </tests>
     <help><![CDATA[
-        Pathview R script
-        Arguments:
-        --help                  Print this test
-        --input                 path of the input  file (must contains a colum of uniprot and/or geneID accession number)
-        --output                Output name of file, could be .png, .jpeg, .pdf or .html
-        --cols                  Columns to use for heatmap, exemple : '3:8' to use columns from the third to the 8th
-        --row_names             Column which contains row names
-        --header                True or False
-        --col_text_angle        Angle of columns label ; from -90 to 90 degres
+
+This tool creates a heatmap from a tsv file (tab delimited).
+
+Input file must have a column for rows labels and colums with numeric data to be used for clustering.
+
+See table below for an example input file
+
+.. csv-table:: Example file
+   :header: "Uniprot","iBAQ_CTR1","iBAQ_CTR2","iBAQ_CTR3","iBAQ_pTCN1","iBAQ_pTCN2","iBAQ_pTCN3"
+
+
+   "Q49AN9",17.4091970440807,16.0474907255521,14.9687330755858,21.8454060245779,18.9468529040903,21.2330797498008
+   "O00148",14.1001686145694,14.806777888004,15.3555560564928,17.2942797505583,18.2106568817514,16.9479095182613
+   "F5H6E2",15.0235503328855,16.6142578028388,20.5969569088489,14.6615767253835,17.9752549753108,20.4023495267791
+   "E9PPW7",18.0770953690935,15.312218369812,13.8048301075204,17.5522130063356,15.9664520099065,15.1597932646987
+   "O00483",17.4188205774495,16.783665086968,15.1589556127476,19.7398973660168,20.8648965533665,20.1781898785682
+   "O00571",12.9049717044645,16.717296441372,13.8708732177805,19.8879681981565,21.0815521014477,17.4710040202845
+
+~
+
+You can choose the columns to be used to create the heatmap.

-        Example:
-        ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0
+You can add manually each colums of interest or enter a range of columns to use.
+
+You then entered the column number of the column you want to be used for the rows labels .
+
+If you have long string in your header, you might want to incline the column labels for better reading.
+
+Default output is html, it allows you to zoom and have row an column labels of a cell by passing your cursor on it.
+
+You can select pdf, jpeg or png if you want a static output.
+
+-----
+
+.. class:: infomark
+
+**Authors**
+
+David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.

     ]]></help>
     <citations>
--- a/heatmap_viz.R	Wed Sep 12 09:37:26 2018 -0400
+++ b/heatmap_viz.R	Thu Dec 13 04:14:21 2018 -0500
@@ -1,7 +1,7 @@
 #!/usr/bin/Rscript

-suppressMessages(library('plotly'))
-suppressMessages(library('heatmaply'))
+suppressMessages(library('plotly',quietly = T))
+suppressMessages(library('heatmaply',quietly = T))

 #packageVersion('plotly')

@@ -27,6 +27,7 @@
       --row_names             Column which contains row names
       --header                True or False
       --col_text_angle        Angle of columns label ; from -90 to 90 degres
+      --dist_fun              function used to compute the distance

       Example:
       ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n")
@@ -43,7 +44,7 @@
 }

 read_file <- function(path,header){
-  file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE),silent=TRUE)
+  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE)
   if (inherits(file,"try-error")){
     stop("File not found !")
   }else{
@@ -63,48 +64,77 @@
 }

 #remove remaining quote
+#only keep usefull columns
 #remove lines with at least one empty cell in a matrix between two defined columns
-clean_df <- function(mat,first_col,last_col,rownames){
-  tmp = mat[,first_col:last_col]
-  tmp <- as.data.frame(apply(tmp,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)}))
-  bad_lines <- which(apply(tmp, 1, function(x) any(is.na(x))))
-  mat <- cbind(mat[,as.numeric(rownames)],tmp)
-  if (length(bad_lines) > 0) {
-    mat <- mat[- bad_lines,]
-    print(paste("lines",bad_lines, "has been removed: at least one non numeric content"))
+clean_df <- function(mat,cols,rownames_col){
+  uto = mat[,cols]
+  uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x)))
+  uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)}))
+  rownames(uto) <- mat[,rownames_col]
+  #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x))))
+  #if (length(bad_lines) > 0) {
+  #  uto <- uto[- bad_lines,]
+  #  print(paste("lines",bad_lines, "has been removed: at least one non numeric content"))
+  #}
+  return(uto)
+}
+
+get_cols <-function(input_cols) {
+  input_cols <- gsub("c","",input_cols)
+  if (grepl(":",input_cols)) {
+    first_col=unlist(strsplit(input_cols,":"))[1]
+    last_col=unlist(strsplit(input_cols,":"))[2]
+    cols=first_col:last_col
+  } else {
+    cols = as.integer(unlist(strsplit(input_cols,",")))
   }
-  return(mat)
+  return(cols)
 }

 #get args
 args <- get_args()

+#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda")
+#load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda")
+
 header=str2bool(args$header)
 output <- rapply(strsplit(args$output,"\\."),c) #remove extension
 output <- paste(output[1:length(output)-1],collapse=".")
 output <- paste(output,args$type,sep=".")
-first_col=as.numeric(substr(args$cols,1,1))
-last_col=as.numeric(substr(args$cols,3,3))
+cols = get_cols(args$cols)
+rownames_col = as.integer(gsub("c","",args$row_names))
+if (length(cols) <=1 ){
+  stop("You need several colums to build a heatmap")
+}
+dist=args$dist
+clust=args$clust
+dendrogram=args$dendrogram

 #cleaning data
-uto <- read_file(args$input,header = header)
-uto <- clean_df(uto,first_col,last_col,args$row_names)
-data <- as.data.frame(uto[,-1])
-row_names = uto[,1]
+uto <- read_file(args$input,header)
+uto <- clean_df(uto,cols,rownames_col)
+uto <- uto[rowSums(is.na(uto)) != ncol(uto), ]  #remove emptylines
+
 if (header) {
   col_names = names(data)
 } else {
-  col_names = c(first_col:last_col)
+  col_names = cols
 }

 #building heatmap
-heatmaply(data, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = row_names, labCol = col_names,
-          grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), width = 1000, height=1000, colors = c('blue','green','yellow','red'))
-
+if (dist %in% c("pearson","spearman","kendall")){
+  heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist,
+            hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle),
+            width = 1000, height=1000, colors = c('blue','green','yellow','red'))
+} else {
+  heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist,
+          hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle),
+          width = 1000, height=1000, colors = c('blue','green','yellow','red'))
+}

 ####heatmaply

-simulateExprData <- function(n, n0, p, rho0, rho1){
+simulateExprData <- function(n, n0, p, rho0, rho1){ row
   # n: total number of subjects
   # n0: number of subjects with exposure 0
   # n1: number of subjects with exposure 1