changeset 3:07748b0136bb draft default tip

planemo upload commit fb27a6b5de5cd7b269a41be3c85c593b77aa1b18-dirty
author proteore
date Thu, 27 Jun 2019 04:18:07 -0400
parents 99207b432ebc
children
files heatmap.xml heatmap_viz.R
diffstat 2 files changed, 68 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/heatmap.xml	Thu Dec 13 04:14:21 2018 -0500
+++ b/heatmap.xml	Thu Jun 27 04:18:07 2019 -0400
@@ -1,5 +1,5 @@
-<tool id="heatmap" name="HeatMap" version="2018.12.12">
-    <description></description>
+<tool id="heatmap" name="HeatMap" version="2019.06.27">
+    <description>(Cluster and visualize the results)</description>
     <requirements>
         <requirement type="package" version="4.7.1">r-plotly</requirement>
         <requirement type="package" version="0.14.1">r-heatmaply</requirement>
@@ -12,7 +12,7 @@
             --output="$file.name"  
             --type='$output_type' 
             --cols='$select_data_columns.cols' 
-            --row_names=$rownames 
+            --row_names="$rownames"
             --header='$header' 
             --col_text_angle='$angle_col'
             --dist="$distance"
@@ -21,22 +21,39 @@
 
     ]]></command>
     <inputs>
-        <param name="file" type="data" format="txt,tabular" label="Select a file (uto table)" help="" />
-        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file have a header?" />
+        <param name="file" type="data" format="txt,tabular" label="Select your file (table)" help="" />
+        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your file contain a header?" />
         <conditional name="select_data_columns">
-            <param name="enter_cols" type="select" label="Select columns or a range of columns to be used for heatmap building">
+            <param name="enter_cols" type="select" label="Select columns or a range of columns containing expression values">
                 <option value="cols_number">Select columns to be used one by one</option>
                 <option value="cols_range">Select a range of columns to be used</option>
             </param>
             <when value="cols_number">
-                <param name="cols" type="text" label="Enter data columns to use for the heatmap separated by commas" help="For example : c3,c5,c7"/>
+                <param name="cols" type="text" label="Enter column number (separated by a comma)" help="For example : c3,c5,c7">
+                    <validator type="regex" message="Please enter column numbers, for example: 'c1,c3' for first and third columns">^([c]{0,1}[0-9]+[,]*)+$</validator>
+                </param>
             </when>
             <when value="cols_range">
-                <param name="cols" type="text" label="Enter a range of data columns to use for the heatmap, first and last column separated bay ':'" help="For example : c2:c7"/>
+                <param name="cols" type="text" label="Enter a range of column number, first and last column separated by ':'" help="For example : c2:c7">
+                    <validator type="regex" message="Please enter first and last column numbers separated by ':', for example: 'c2:c7' for all colums from the second to the 7th">^[c]{0,1}[0-9]+:[c]{0,1}[0-9]+$</validator>
+                </param>
             </when>
         </conditional>
-        <param name="rownames" type="text" value="c1" label="Enter the column to use for row labels" help="for example : c1"/>
-        <param name="distance" type="select" label="Distance measurement method" value="euclidean">
+        <param name="rownames" type="text" value="c1" label="Enter column number containing row labels" help="for example : c1 if labels are in column n°1">
+            <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+        </param>
+        <param type="integer" name="angle_col" label="Angle of column labels" value="0" min="-90" max="90" />
+        <param name="clustering" type="select" label="Clustering method" value="average">
+            <option value="ward.D">Ward</option>
+            <option value="ward.D2">Ward2</option>
+            <option value="single">Single linkage (nearest neighbor)</option>
+            <option value="complete">Complete linkage (farthest neighbor)</option>
+            <option value="average" selected="true">Group average linkage (UPGMA)</option>
+            <option value="mcquitty">Simple average method (WPGMA)</option>
+            <!--option value="median">Median (WPGMC)</option>
+            <option value="centroid">Centroid (UPGMC)</option-->
+        </param>
+         <param name="distance" type="select" label="Distance measurement method" value="euclidean">
             <option value="euclidean" selected="true">Euclidean</option>
             <option value="pearson" selected="true">Pearson</option>
             <option value="spearman">Spearman</option>
@@ -47,28 +64,18 @@
             <option value="binary">Binary</option>
             <option value="minkowski">Minkowski</option>
         </param>
-        <param name="clustering" type="select" label="Clustering method" value="average">
-            <option value="ward.D">Ward</option>
-            <option value="ward.D2">Ward2</option>
-            <option value="single">Single linkage (nearest neighbor)</option>
-            <option value="complete">Complete linkage (farthest neighbor</option>
-            <option value="average" selected="true">Group average linkage (UPGMA)</option>
-            <option value="mcquitty">Simple average method (WPGMA)</option>
-            <!--option value="median">Median (WPGMC)</option>
-            <option value="centroid">Centroid (UPGMC)</option-->
-        </param>
         <param name="dendrogram" type="select" label="Apply clustering on :" value="both">
             <option value="row">Rows</option>
             <option value="column">Columns</option>
             <option value="both" selected="true">Rows and columns</option>
             <option value="none">None</option>
         </param>
-        <param type="integer" name="angle_col" label="Angle of column labels" value="0" min="-90" max="90" />
-        <param name="output_type" type="select" label="Choose the output format">
+        
+        <param name="output_type" type="select" label="Select output format">
             <option value="html">html</option>
             <option value="pdf">pdf</option>
-            <option value="jpeg">jpeg</option>
-            <option value="png">png</option>
+            <!--option value="jpeg">jpeg</option-->
+            <!--option value="png">png</option-->
         </param>
 
     </inputs>
@@ -96,14 +103,19 @@
         </test>
     </tests>
     <help><![CDATA[
+**Description**
 
-This tool creates a heatmap from a tsv file (tab delimited).
+This tool allows users to generate, cluster and visualize expression-based heat maps from transcriptomic, proteomic and metabolomic experiments.
+It is based on heatmaply, an R package for easily creating interactive cluster heatmaps (see reference below)
 
-Input file must have a column for rows labels and colums with numeric data to be used for clustering.
+-----
 
+**Input**
+
+A file (tab-delimited) having a column with labels (e.g. a gene name, Uniprot accession number...) and colums with numerical value (intensities) for clustering.
 See table below for an example input file
 
-.. csv-table:: Example file
+.. csv-table:: example of input file
    :header: "Uniprot","iBAQ_CTR1","iBAQ_CTR2","iBAQ_CTR3","iBAQ_pTCN1","iBAQ_pTCN2","iBAQ_pTCN3"
 
 
@@ -114,49 +126,48 @@
    "O00483",17.4188205774495,16.783665086968,15.1589556127476,19.7398973660168,20.8648965533665,20.1781898785682
    "O00571",12.9049717044645,16.717296441372,13.8708732177805,19.8879681981565,21.0815521014477,17.4710040202845
 
-~
+-----
+
+**Parameters**
 
-You can choose the columns to be used to create the heatmap.
+"Select columns or a range of columns containing expression values": choose the columns to use to perform clustering and to create the heatmap.
+You can enter specific column number (e.g. c2,c5 will create a heatmap for column 2 and 5 corresponding to condition from the example file above ) or a range of columns to use (e.g. c2:c7 will consider all replicates of each condition in the example above)).
 
-You can add manually each colums of interest or enter a range of columns to use.
+"Enter column number containing row labels": enter the column number containing the rows labels (e.g. "c1" in the example above).
+
+"Angle of column labels": In case of long label name, you might want to incline the column labels for practical display.
 
-You then entered the column number of the column you want to be used for the rows labels .
+"Clustering method": methods for computing hierarchical clustering (six available) 
 
-If you have long string in your header, you might want to incline the column labels for better reading.
+"Distance measurement method": function used to compute the distance (dissimilarity) between both rows and columns (nine available). The options "pearson", "spearman" and "kendall" can
+be used to use correlation-based clustering.
 
-Default output is html, it allows you to zoom and have row an column labels of a cell by passing your cursor on it.
+-----
 
-You can select pdf, jpeg or png if you want a static output.
+**Output**
+
+Default output is html; it allows browsing the heatmap in an interactive way (of note: for large file, display and interactivity can be altered), pdf format are proposed for static output.
 
 -----
 
 .. class:: infomark
 
 **Authors**
+Galili T, O'Callaghan A, Sidi J, Sievert C. heatmaply: an R package for creating interactive cluster heatmaps for online publishing. Bioinformatics. 2018. 34(9):1600-1602. doi: 10.1093/bioinformatics/btx657. PubMed PMID: 29069305
+
+-----
+
+.. class:: infomark
+
+**Galaxy integration**
 
 David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
 
-Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+Sandra Dérozier, Olivier Rué, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
 
 This work has been partially funded through the French National Agency for Research (ANR) IFB project.
 
-Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+Help: contact@proteore.org for any questions or concerns about this tool.
 
     ]]></help>
-    <citations>
-        <citation type="bibtex">
-@misc{renameTODO,
-  author = {LastTODO, FirstTODO},
-  year = {TODO},
-  title = {TODO},
-  url = {https://plot.ly/r/},
-}</citation>
-        <citation type="bibtex">
-@misc{renameTODO,
-  author = {LastTODO, FirstTODO},
-  year = {TODO},
-  title = {TODO},
-  url = {https://cran.r-project.org/package=heatmaply},
-}</citation>
-    </citations>
-</tool>
\ No newline at end of file
+</tool>
--- a/heatmap_viz.R	Thu Dec 13 04:14:21 2018 -0500
+++ b/heatmap_viz.R	Thu Jun 27 04:18:07 2019 -0400
@@ -1,4 +1,5 @@
 #!/usr/bin/Rscript
+options(warn=-1)  #TURN OFF WARNINGS !!!!!!
 
 suppressMessages(library('plotly',quietly = T))
 suppressMessages(library('heatmaply',quietly = T))
@@ -80,7 +81,7 @@
 }
 
 get_cols <-function(input_cols) {
-  input_cols <- gsub("c","",input_cols)
+  input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols)))
   if (grepl(":",input_cols)) {
     first_col=unlist(strsplit(input_cols,":"))[1]
     last_col=unlist(strsplit(input_cols,":"))[2]
@@ -102,7 +103,7 @@
 output <- paste(output[1:length(output)-1],collapse=".")
 output <- paste(output,args$type,sep=".")
 cols = get_cols(args$cols)
-rownames_col = as.integer(gsub("c","",args$row_names))
+rownames_col = as.integer(gsub("c","",gsub("C","",gsub(" ","",args$row_names))))
 if (length(cols) <=1 ){
   stop("You need several colums to build a heatmap")
 }
@@ -116,7 +117,7 @@
 uto <- uto[rowSums(is.na(uto)) != ncol(uto), ]  #remove emptylines
 
 if (header) {
-  col_names = names(data)
+  col_names = names(uto)
 } else {
   col_names = cols
 }