Mercurial > repos > proteore > proteore_heatmap_visualization
changeset 2:99207b432ebc draft
planemo upload commit c599cfc156c77626df2b674bdfbd437b9f664ab9
| author | proteore |
|---|---|
| date | Thu, 13 Dec 2018 04:14:21 -0500 |
| parents | 4651551b48e4 |
| children | 07748b0136bb |
| files | heatmap.xml heatmap_viz.R |
| diffstat | 2 files changed, 153 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/heatmap.xml Wed Sep 12 09:37:26 2018 -0400 +++ b/heatmap.xml Thu Dec 13 04:14:21 2018 -0500 @@ -1,4 +1,5 @@ -<tool id="heatmap" name="heatmap visualization from uto table (heatmaply)" version="2018.09.12"> +<tool id="heatmap" name="HeatMap" version="2018.12.12"> + <description></description> <requirements> <requirement type="package" version="4.7.1">r-plotly</requirement> <requirement type="package" version="0.14.1">r-heatmaply</requirement> @@ -6,14 +7,62 @@ <requirement type="package" version="2.2.1">pandoc</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - Rscript $__tool_directory__/heatmap_viz.R --input='$file' --output="$file.name" --type='$output_type' --cols='$cols' - --row_names=$rownames --header='$header' --col_text_angle='$angle_col' + Rscript $__tool_directory__/heatmap_viz.R + --input='$file' + --output="$file.name" + --type='$output_type' + --cols='$select_data_columns.cols' + --row_names=$rownames + --header='$header' + --col_text_angle='$angle_col' + --dist="$distance" + --clust="$clustering" + --dendrogram="$dendrogram" + ]]></command> <inputs> <param name="file" type="data" format="txt,tabular" label="Select a file (uto table)" help="" /> - <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> - <param name="cols" type="text" value="" label="Enter columns to use from the first to the last separated by ':'" help='example : 3:8'/> - <param name="rownames" type="integer" value="1" label="Enter the column to use for row labels" help="for example : 1"/> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file have a header?" /> + <conditional name="select_data_columns"> + <param name="enter_cols" type="select" label="Select columns or a range of columns to be used for heatmap building"> + <option value="cols_number">Select columns to be used one by one</option> + <option value="cols_range">Select a range of columns to be used</option> + </param> + <when value="cols_number"> + <param name="cols" type="text" label="Enter data columns to use for the heatmap separated by commas" help="For example : c3,c5,c7"/> + </when> + <when value="cols_range"> + <param name="cols" type="text" label="Enter a range of data columns to use for the heatmap, first and last column separated bay ':'" help="For example : c2:c7"/> + </when> + </conditional> + <param name="rownames" type="text" value="c1" label="Enter the column to use for row labels" help="for example : c1"/> + <param name="distance" type="select" label="Distance measurement method" value="euclidean"> + <option value="euclidean" selected="true">Euclidean</option> + <option value="pearson" selected="true">Pearson</option> + <option value="spearman">Spearman</option> + <option value="kendall">Kendall</option> + <option value="maximum">Maximum</option> + <option value="manhattan">Manhattan</option> + <option value="canberra">Canberra</option> + <option value="binary">Binary</option> + <option value="minkowski">Minkowski</option> + </param> + <param name="clustering" type="select" label="Clustering method" value="average"> + <option value="ward.D">Ward</option> + <option value="ward.D2">Ward2</option> + <option value="single">Single linkage (nearest neighbor)</option> + <option value="complete">Complete linkage (farthest neighbor</option> + <option value="average" selected="true">Group average linkage (UPGMA)</option> + <option value="mcquitty">Simple average method (WPGMA)</option> + <!--option value="median">Median (WPGMC)</option> + <option value="centroid">Centroid (UPGMC)</option--> + </param> + <param name="dendrogram" type="select" label="Apply clustering on :" value="both"> + <option value="row">Rows</option> + <option value="column">Columns</option> + <option value="both" selected="true">Rows and columns</option> + <option value="none">None</option> + </param> <param type="integer" name="angle_col" label="Angle of column labels" value="0" min="-90" max="90" /> <param name="output_type" type="select" label="Choose the output format"> <option value="html">html</option> @@ -21,6 +70,7 @@ <option value="jpeg">jpeg</option> <option value="png">png</option> </param> + </inputs> <outputs> <data name="output" format="html"> @@ -46,18 +96,51 @@ </test> </tests> <help><![CDATA[ - Pathview R script - Arguments: - --help Print this test - --input path of the input file (must contains a colum of uniprot and/or geneID accession number) - --output Output name of file, could be .png, .jpeg, .pdf or .html - --cols Columns to use for heatmap, exemple : '3:8' to use columns from the third to the 8th - --row_names Column which contains row names - --header True or False - --col_text_angle Angle of columns label ; from -90 to 90 degres + +This tool creates a heatmap from a tsv file (tab delimited). + +Input file must have a column for rows labels and colums with numeric data to be used for clustering. + +See table below for an example input file + +.. csv-table:: Example file + :header: "Uniprot","iBAQ_CTR1","iBAQ_CTR2","iBAQ_CTR3","iBAQ_pTCN1","iBAQ_pTCN2","iBAQ_pTCN3" + + + "Q49AN9",17.4091970440807,16.0474907255521,14.9687330755858,21.8454060245779,18.9468529040903,21.2330797498008 + "O00148",14.1001686145694,14.806777888004,15.3555560564928,17.2942797505583,18.2106568817514,16.9479095182613 + "F5H6E2",15.0235503328855,16.6142578028388,20.5969569088489,14.6615767253835,17.9752549753108,20.4023495267791 + "E9PPW7",18.0770953690935,15.312218369812,13.8048301075204,17.5522130063356,15.9664520099065,15.1597932646987 + "O00483",17.4188205774495,16.783665086968,15.1589556127476,19.7398973660168,20.8648965533665,20.1781898785682 + "O00571",12.9049717044645,16.717296441372,13.8708732177805,19.8879681981565,21.0815521014477,17.4710040202845 + +~ + +You can choose the columns to be used to create the heatmap. - Example: - ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 +You can add manually each colums of interest or enter a range of columns to use. + +You then entered the column number of the column you want to be used for the rows labels . + +If you have long string in your header, you might want to incline the column labels for better reading. + +Default output is html, it allows you to zoom and have row an column labels of a cell by passing your cursor on it. + +You can select pdf, jpeg or png if you want a static output. + +----- + +.. class:: infomark + +**Authors** + +David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. ]]></help> <citations>
--- a/heatmap_viz.R Wed Sep 12 09:37:26 2018 -0400 +++ b/heatmap_viz.R Thu Dec 13 04:14:21 2018 -0500 @@ -1,7 +1,7 @@ #!/usr/bin/Rscript -suppressMessages(library('plotly')) -suppressMessages(library('heatmaply')) +suppressMessages(library('plotly',quietly = T)) +suppressMessages(library('heatmaply',quietly = T)) #packageVersion('plotly') @@ -27,6 +27,7 @@ --row_names Column which contains row names --header True or False --col_text_angle Angle of columns label ; from -90 to 90 degres + --dist_fun function used to compute the distance Example: ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n") @@ -43,7 +44,7 @@ } read_file <- function(path,header){ - file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE),silent=TRUE) + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE) if (inherits(file,"try-error")){ stop("File not found !") }else{ @@ -63,48 +64,77 @@ } #remove remaining quote +#only keep usefull columns #remove lines with at least one empty cell in a matrix between two defined columns -clean_df <- function(mat,first_col,last_col,rownames){ - tmp = mat[,first_col:last_col] - tmp <- as.data.frame(apply(tmp,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)})) - bad_lines <- which(apply(tmp, 1, function(x) any(is.na(x)))) - mat <- cbind(mat[,as.numeric(rownames)],tmp) - if (length(bad_lines) > 0) { - mat <- mat[- bad_lines,] - print(paste("lines",bad_lines, "has been removed: at least one non numeric content")) +clean_df <- function(mat,cols,rownames_col){ + uto = mat[,cols] + uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x))) + uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)})) + rownames(uto) <- mat[,rownames_col] + #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x)))) + #if (length(bad_lines) > 0) { + # uto <- uto[- bad_lines,] + # print(paste("lines",bad_lines, "has been removed: at least one non numeric content")) + #} + return(uto) +} + +get_cols <-function(input_cols) { + input_cols <- gsub("c","",input_cols) + if (grepl(":",input_cols)) { + first_col=unlist(strsplit(input_cols,":"))[1] + last_col=unlist(strsplit(input_cols,":"))[2] + cols=first_col:last_col + } else { + cols = as.integer(unlist(strsplit(input_cols,","))) } - return(mat) + return(cols) } #get args args <- get_args() +#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") +#load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") + header=str2bool(args$header) output <- rapply(strsplit(args$output,"\\."),c) #remove extension output <- paste(output[1:length(output)-1],collapse=".") output <- paste(output,args$type,sep=".") -first_col=as.numeric(substr(args$cols,1,1)) -last_col=as.numeric(substr(args$cols,3,3)) +cols = get_cols(args$cols) +rownames_col = as.integer(gsub("c","",args$row_names)) +if (length(cols) <=1 ){ + stop("You need several colums to build a heatmap") +} +dist=args$dist +clust=args$clust +dendrogram=args$dendrogram #cleaning data -uto <- read_file(args$input,header = header) -uto <- clean_df(uto,first_col,last_col,args$row_names) -data <- as.data.frame(uto[,-1]) -row_names = uto[,1] +uto <- read_file(args$input,header) +uto <- clean_df(uto,cols,rownames_col) +uto <- uto[rowSums(is.na(uto)) != ncol(uto), ] #remove emptylines + if (header) { col_names = names(data) } else { - col_names = c(first_col:last_col) + col_names = cols } #building heatmap -heatmaply(data, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = row_names, labCol = col_names, - grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), width = 1000, height=1000, colors = c('blue','green','yellow','red')) - +if (dist %in% c("pearson","spearman","kendall")){ + heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist, + hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), + width = 1000, height=1000, colors = c('blue','green','yellow','red')) +} else { + heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist, + hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), + width = 1000, height=1000, colors = c('blue','green','yellow','red')) +} ####heatmaply -simulateExprData <- function(n, n0, p, rho0, rho1){ +simulateExprData <- function(n, n0, p, rho0, rho1){ row # n: total number of subjects # n0: number of subjects with exposure 0 # n1: number of subjects with exposure 1
