Mercurial > repos > greg > kaks_analysis_barplot
changeset 0:844acb833219 draft
Uploaded
author | greg |
---|---|
date | Wed, 08 Mar 2017 08:55:19 -0500 |
parents | |
children | 30430f2758fe |
files | kaks_analysis_barplot.R kaks_analysis_barplot.xml |
diffstat | 2 files changed, 145 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kaks_analysis_barplot.R Wed Mar 08 08:55:19 2017 -0500 @@ -0,0 +1,111 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("-c", "--components"), action="store", dest="components", help="Ks significant components input dataset"), + make_option(c("-o", "--output"), action="store", dest="output", default=NULL, help="Output dataset"), +) + +parser <- OptionParser(usage="%prog [options] file", option_list=option_list) +args <- parse_args(parser, positional_arguments=TRUE) +opt <- args$options + + +get_num_components=function(components_dataset) +{ + # Read in the components data. + components_data <- read.delim(components_dataset, header=TRUE); + # Get the max of the number_comp column. + num_components <- max(components_data[3, ], na.rm=TRUE); + return = c(components_data, num_components) + return +} + +get_pi_mu_var = function(components_data, num_components) { + # FixMe: enhance this to generically handle any integer value for num_components. + if (num_components == 1) { + pi <- c(components_data[1, 9]); + mu <- c(components_data[1, 7]); + var <- c(components_data[1, 8]); + } else if (num_components == 2) { + pi <- c(components_data[2, 9], components_data[3, 9]); + mu <- c(components_data[2, 7], components_data[3, 7]); + var <- c(components_data[2, 8], components_data[3, 8]); + } else if (num_components == 3) { + pi <- c(components_data[4, 9], components_data[5, 9], components_data[6, 9]); + mu <- c(components_data[4, 7], components_data[5, 7], components_data[6, 7]); + var <- c(components_data[4, 8], components_data[5, 8], components_data[6, 8]); + } else if (num_components == 4) { + pi <- c(components_data[7, 9], components_data[8, 9], components_data[9, 9], components_data[10, 9]); + mu <- c(components_data[7, 7], components_data[8, 7], components_data[9, 7], components_data[10, 7]); + var <- c(components_data[7, 8], components_data[8, 8], components_data[9, 8], components_data[10, 8]); + return = c(pi, mu, var) + return +} + +plot_ks<-function(ksfile, pi, mu, var) { + #change bin width + bin <- 0.05 * seq(0, 40); + kaks <- read.table(file=ksfile, header=T); + kaks <- kaks[kaks$Ks<2,]; + h.kst <- hist(kaks$Ks, breaks=bin, plot=F); + nc <- h.kst$counts; + vx <- h.kst$mids; + ntot <- sum(nc); + # Set margin for plot bottom, left top, right. + par(mai=c(0.5, 0.5, 0, 0)); + # Plot dimension in inches. + par(pin=c(2.5, 2.5)); + g <- calculate_fitted_density(pi, mu, var); + h <- ntot * 2.5 / sum(g); + vx <- seq(1, 100) * 0.02; + ymax <- max(nc) + 5; + barplot(nc, space=0.25, offset=0, width=0.04, xlim=c(0,2), ylim=c(0, ymax)); + # Add x-axis. + axis(1); + color <- c('green', 'blue', 'black', 'red'); + for (i in 1:length(mu)) { + lines(vx, g[,i] * h, lwd=2, col=color[i]); + } +}; + +calculate_fitted_density <- function(pi, mu, var) { + comp <- length(pi); + var <- var/mu^2; + mu <- log(mu);; + #calculate lognormal density + vx <- seq(1, 100) * 0.02; + fx <- matrix(0, 100, comp); + for (i in 1:100) { + for (j in 1:comp) { + fx[i, j] <- pi[j] * dlnorm(vx[i], meanlog=mu[j], sdlog=(sqrt(var[j]))); + }; + }; + fx; +} + +# Read in the components data and get the number of components. +items <- get_num_components(opt$components) +components_data <- items[1] +num_components <- items[2] + +# Set output file name. +if (is.null(opt$output)) { + # Name the output file based on the name of the + # input file, properly handling full path if passed. + input_filename = basename(opt$components) + items = strsplit(input_filename, ".") + output_filename <- paste(items[1], ".components.", num_components, ".pdf") +} else { + output_filename <- opt$output +} + +# Set pi, mu, var. +items <- get_pi_mu_var(components_data, num_components) +pi <- items[1] +mu <- items[2] +var <- items[3] + +# Plot the output. +plot_ks(output_filename, pi, mu, var)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kaks_analysis_barplot.xml Wed Mar 08 08:55:19 2017 -0500 @@ -0,0 +1,34 @@ +<tool id="kaks_barplot" name="KaKs Barplot" version="1.0.0"> + <description></description> + <requirements> + <requirement type="package" version="1.3.0">r-optparse</requirement> + </requirements> + <command> + <![CDATA[ + Rscript $__tool_directory__/kaks_barplot.R + -s '${components}' + -o "$output" + ]]> + </command> + <inputs> + <param name="components" format="tabular" type="data" label="Select dataset containing significant components in the ks distribution" /> + </inputs> + <outputs> + <data name="output" format="pdf"/> + </outputs> + <help> +**What it does** + +Draws a barplot of the significant components in the ks distribution. + </help> + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {Eric Wafula}, + title = {None}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes} + } + </citation> + </citations> +</tool>