Mercurial > repos > greg > kaks_analysis_distribution
changeset 0:6e0f0227283b draft
Uploaded
author | greg |
---|---|
date | Fri, 17 Mar 2017 07:47:15 -0400 |
parents | |
children | d6f8c3d739b9 |
files | .shed.yml kaks_analysis_distribution.R kaks_analysis_distribution.xml test-data/components.tabular test-data/kaks.tabular test-data/output.pdf |
diffstat | 6 files changed, 345 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Fri Mar 17 07:47:15 2017 -0400 @@ -0,0 +1,11 @@ +name: plant_tribes_kaks_analysis_barplot +owner: iuc +description: | + Contains a tool that draws a barplot of the significant components in the ks distribution. +homepage_url: https://github.com/dePamphilis/PlantTribes +long_description: | + Contains a tool that draws a barplot of the significant components in the ks distribution. +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/plant_tribes/visualization +type: unrestricted +categories: +- Phylogenetics
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kaks_analysis_distribution.R Fri Mar 17 07:47:15 2017 -0400 @@ -0,0 +1,115 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("-c", "--components_input"), action="store", dest="components_input", help="Ks significant components input dataset"), + make_option(c("-k", "--kaks_input"), action="store", dest="kaks_input", help="KaKs analysis input dataset"), + make_option(c("-o", "--output"), action="store", dest="output", help="Output dataset") +) + +parser <- OptionParser(usage="%prog [options] file", option_list=option_list) +args <- parse_args(parser, positional_arguments=TRUE) +opt <- args$options + + +get_num_components = function(components_data) +{ + # Get the max of the number_comp column. + number_comp = components_data[, 3] + num_components <- max(number_comp, na.rm=TRUE); + num_components +} + +get_pi_mu_var = function(components_data, num_components) +{ + # FixMe: enhance this to generically handle any integer value for num_components. + if (num_components == 1) + { + pi <- c(components_data[1, 9]); + mu <- c(components_data[1, 7]); + var <- c(components_data[1, 8]); + } + else if (num_components == 2) + { + pi <- c(components_data[2, 9], components_data[3, 9]); + mu <- c(components_data[2, 7], components_data[3, 7]); + var <- c(components_data[2, 8], components_data[3, 8]); + } + else if (num_components == 3) + { + pi <- c(components_data[4, 9], components_data[5, 9], components_data[6, 9]); + mu <- c(components_data[4, 7], components_data[5, 7], components_data[6, 7]); + var <- c(components_data[4, 8], components_data[5, 8], components_data[6, 8]); + } + else if (num_components == 4) + { + pi <- c(components_data[7, 9], components_data[8, 9], components_data[9, 9], components_data[10, 9]); + mu <- c(components_data[7, 7], components_data[8, 7], components_data[9, 7], components_data[10, 7]); + var <- c(components_data[7, 8], components_data[8, 8], components_data[9, 8], components_data[10, 8]); + } + return = c(pi, mu, var) + return +} + +plot_ks<-function(kaks_input, output, pi, mu, var) +{ + # Start PDF device driver to save charts to output. + pdf(file=output, bg="white") + # Change bin width + bin <- 0.05 * seq(0, 40); + kaks <- read.table(file=kaks_input, header=T); + kaks <- kaks[kaks$Ks<2,]; + h.kst <- hist(kaks$Ks, breaks=bin, plot=F); + nc <- h.kst$counts; + vx <- h.kst$mids; + ntot <- sum(nc); + # Set margin for plot bottom, left top, right. + par(mai=c(0.5, 0.5, 0, 0)); + # Plot dimension in inches. + par(pin=c(2.5, 2.5)); + g <- calculate_fitted_density(pi, mu, var); + h <- ntot * 2.5 / sum(g); + vx <- seq(1, 100) * 0.02; + ymax <- max(nc) + 5; + barplot(nc, space=0.25, offset=0, width=0.04, xlim=c(0,2), ylim=c(0, ymax)); + # Add x-axis. + axis(1); + color <- c('green', 'blue', 'black', 'red'); + for (i in 1:length(mu)) + { + lines(vx, g[,i] * h, lwd=2, col=color[i]); + } +}; + +calculate_fitted_density <- function(pi, mu, var) +{ + comp <- length(pi); + var <- var/mu^2; + mu <- log(mu); + #calculate lognormal density + vx <- seq(1, 100) * 0.02; + fx <- matrix(0, 100, comp); + for (i in 1:100) + { + for (j in 1:comp) + { + fx[i, j] <- pi[j] * dlnorm(vx[i], meanlog=mu[j], sdlog=(sqrt(var[j]))); + }; + }; + fx; +} + +# Read in the components data. +components_data <- read.delim(opt$components_input, header=TRUE); +# Get the number of components. +num_components <- get_num_components(components_data) + +# Set pi, mu, var. +items <- get_pi_mu_var(components_data, num_components); +pi <- items[1]; +mu <- items[2]; +var <- items[3]; + +# Plot the output. +plot_ks(opt$kaks_input, opt$output, pi, mu, var);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kaks_analysis_distribution.xml Fri Mar 17 07:47:15 2017 -0400 @@ -0,0 +1,44 @@ +<tool id="kaks_analysis_distribution" name="KaKsAnalysis distribution" version="1.0.0"> + <description>plots synonymous (ks) distribution and fit significant component</description> + <requirements> + <requirement type="package" version="1.3.0">r-optparse</requirement> + </requirements> + <command> + <![CDATA[ + Rscript $__tool_directory__/kaks_analysis_distribution.R + -k '${kaks}' + -c '${components}' + -o '$output' + ]]> + </command> + <inputs> + <param name="kaks" format="tabular" type="data" label="KaKs analysis file" /> + <param name="components" format="tabular" type="data" label="Significant components in the ks distribution" /> + </inputs> + <outputs> + <data name="output" format="pdf"/> + </outputs> + <tests> + <test> + <param name="kaks" value="kaks.tabular" ftype="tabular" /> + <param name="components" value="components.tabular" ftype="tabular" /> + <output name="output" file="output.pdf" ftype="pdf" compare="contains" /> + </test> + </tests> + <help> +**What it does** + +Draws a barplot of the significant components in the ks distribution. + </help> + <citations> + <citation type="bibtex"> + @article{None, + journal = {None}, + author = {1. Wafula EK}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes},} + </citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/components.tabular Fri Mar 17 07:47:15 2017 -0400 @@ -0,0 +1,7 @@ +species n number_comp lnL AIC BIC mean variance porportion +species1 1160 1 -1404.9900 2813.98 2824.09 0.4426 0.1293 1.00 +species1 1160 2 -1353.6550 2717.31 2742.59 0.7376 0.1672 0.61 + 0.2000 0.0069 0.39 +species1 1160 3 -1323.7480 2663.50 2703.94 0.1214 0.0002 0.10 + 0.7759 0.1663 0.57 + 0.2428 0.0070 0.33
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaks.tabular Fri Mar 17 07:47:15 2017 -0400 @@ -0,0 +1,101 @@ +SEQ1 SEQ2 Ka Ks Ka\Ks +species1_10079 species1_10080 0.1194 0.9782 0.1221 +species1_10818 species1_10817 0.0194 0.9541 0.0203 +species1_4505 species1_4506 0.0379 0.2411 0.1571 +species1_2890 species1_2889 0.0212 0.8520 0.0249 +species1_3386 species1_3385 0.0002 0.1698 0.0010 +species1_2266 species1_2265 0.0881 0.1515 0.5814 +species1_7052 species1_7051 0.0514 0.8172 0.0630 +species1_3374 species1_3373 0.0034 0.4496 0.0075 +species1_688 species1_687 0.1420 0.2970 0.4782 +species1_10669 species1_10670 0.0003 0.3216 0.0010 +species1_9810 species1_9809 0.1490 0.7782 0.1914 +species1_11440 species1_11439 0.0083 0.2177 0.0380 +species1_2713 species1_2714 0.1227 0.2133 0.5755 +species1_4698 species1_4697 0.1021 0.2207 0.4624 +species1_10939 species1_10940 0.0861 0.7844 0.1098 +species1_22 species1_21 0.1368 0.2515 0.5437 +species1_7149 species1_7150 0.0817 0.9899 0.0825 +species1_5139 species1_5140 0.1167 0.2669 0.4373 +species1_3430 species1_3429 0.0803 0.9931 0.0808 +species1_10208 species1_10207 0.0764 0.1098 0.6957 +species1_8410 species1_8409 0.0165 0.8114 0.0204 +species1_2452 species1_2451 0.0629 0.4523 0.1391 +species1_289 species1_290 0.0560 0.1590 0.3526 +species1_10028 species1_10027 0.0927 1.3078 0.0708 +species1_8242 species1_8241 0.0037 0.1390 0.0264 +species1_5661 species1_5662 0.2040 0.5982 0.3410 +species1_85 species1_86 0.0391 0.1012 0.3866 +species1_1660 species1_1659 0.0408 0.3674 0.1111 +species1_616 species1_615 0.0078 0.1315 0.0595 +species1_8043 species1_8044 0.0637 0.3891 0.1637 +species1_1465 species1_1466 0.1186 0.1725 0.6876 +species1_7761 species1_7762 0.2544 0.3513 0.7242 +species1_299 species1_300 0.1407 0.5325 0.2643 +species1_3563 species1_3564 0.0732 0.1679 0.4360 +species1_4847 species1_4848 0.4327 0.6173 0.7009 +species1_3344 species1_3343 0.0642 0.5842 0.1098 +species1_262 species1_261 0.1786 1.5710 0.1137 +species1_3725 species1_3726 0.1883 0.8863 0.2125 +species1_779 species1_780 0.0725 0.2543 0.2849 +species1_1818 species1_1817 0.0681 0.2312 0.2946 +species1_11023 species1_11024 0.1603 0.6655 0.2408 +species1_9653 species1_9654 0.0007 0.7308 0.0010 +species1_6805 species1_6806 0.1090 0.9048 0.1205 +species1_10221 species1_10222 0.0522 0.1314 0.3975 +species1_6011 species1_6012 0.0099 0.9179 0.0108 +species1_5208 species1_5207 0.0931 0.6077 0.1532 +species1_10692 species1_10691 0.0537 0.1868 0.2876 +species1_10036 species1_10035 0.2450 0.4664 0.5254 +species1_1382 species1_1381 0.0682 0.1241 0.5495 +species1_1350 species1_1349 0.0129 0.4414 0.0293 +species1_6889 species1_6890 0.0195 0.7789 0.0251 +species1_4426 species1_4425 0.0399 0.3048 0.1309 +species1_7405 species1_7406 0.0443 1.3004 0.0341 +species1_11014 species1_11013 0.0643 0.9027 0.0712 +species1_8683 species1_8684 0.1321 0.6680 0.1977 +species1_5287 species1_5288 0.0882 0.3480 0.2534 +species1_11671 species1_11672 0.1101 0.2895 0.3802 +species1_551 species1_552 0.0165 0.2045 0.0809 +species1_3396 species1_3395 0.3309 1.6105 0.2055 +species1_5398 species1_5397 0.0004 0.4077 0.0010 +species1_7959 species1_7960 0.0498 0.4343 0.1146 +species1_11030 species1_11029 0.0999 0.2613 0.3825 +species1_10250 species1_10249 0.0486 0.1441 0.3372 +species1_6254 species1_6253 0.6399 0.8894 0.7195 +species1_5438 species1_5437 0.0613 0.2224 0.2757 +species1_5728 species1_5727 0.0788 0.1384 0.5697 +species1_8814 species1_8813 0.1076 1.8253 0.0590 +species1_6351 species1_6352 0.2963 0.8001 0.3704 +species1_11253 species1_11254 0.1359 0.7304 0.1860 +species1_11373 species1_11374 0.0924 0.1261 0.7325 +species1_4903 species1_4904 0.1021 1.3915 0.0734 +species1_4326 species1_4325 0.0275 0.1845 0.1492 +species1_9047 species1_9048 0.0648 2.3728 0.0273 +species1_7487 species1_7488 0.0488 0.2406 0.2029 +species1_1125 species1_1126 0.0813 0.3900 0.2085 +species1_9140 species1_9139 0.1643 0.7126 0.2306 +species1_3355 species1_3356 0.0262 0.9844 0.0266 +species1_7068 species1_7067 0.0368 0.1055 0.3492 +species1_8341 species1_8342 0.0356 0.6112 0.0583 +species1_4871 species1_4872 0.0728 0.2256 0.3227 +species1_4408 species1_4407 0.1148 0.7863 0.1460 +species1_10007 species1_10008 0.1071 0.5462 0.1960 +species1_236 species1_235 0.0834 0.2601 0.3207 +species1_4571 species1_4572 0.0745 1.0106 0.0737 +species1_897 species1_898 0.0916 0.9281 0.0987 +species1_5161 species1_5162 0.0707 0.4099 0.1725 +species1_6899 species1_6900 0.0814 0.6724 0.1210 +species1_5749 species1_5750 0.0325 0.3209 0.1013 +species1_2843 species1_2844 0.0327 0.8263 0.0396 +species1_653 species1_654 0.0382 0.8706 0.0439 +species1_8698 species1_8697 0.2628 2.2612 0.1162 +species1_405 species1_406 0.1135 0.1936 0.5861 +species1_4782 species1_4781 0.0341 1.1289 0.0302 +species1_42 species1_41 0.1711 0.4618 0.3706 +species1_1330 species1_1329 0.1074 0.7932 0.1354 +species1_11127 species1_11128 0.1866 1.6162 0.1155 +species1_7639 species1_7640 0.0787 0.4300 0.1830 +species1_7061 species1_7062 0.0571 0.3851 0.1483 +species1_56 species1_55 0.0260 0.6935 0.0375 +species1_417 species1_418 0.0272 0.8148 0.0334
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.pdf Fri Mar 17 07:47:15 2017 -0400 @@ -0,0 +1,67 @@ +%PDF-1.4 +%ρ\r +1 0 obj +<< +/Title (R Graphics Output) +/Creator (R) +>> +endobj +2 0 obj +<< /Type /Catalog /Pages 3 0 R >> +endobj +7 0 obj +<< /Type /Page /Parent 3 0 R /Contents 8 0 R /Resources 4 0 R >> +endobj +8 0 obj +<< +/Length 1245 /Filter /FlateDecode +>> +stream +endobj +3 0 obj +<< /Type /Pages /Kids [ 7 0 R ] /Count 1 /MediaBox [0 0 504 504] >> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <</F2 10 0 R >> +/ExtGState << >> +/ColorSpace << /sRGB 5 0 R >> +>> +endobj +5 0 obj +[/ICCBased 6 0 R] +endobj +6 0 obj +<< /Alternate /DeviceRGB /N 3 /Length 2596 /Filter /FlateDecode >> +stream +9 0 obj +<< +/Type /Encoding /BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +10 0 obj +<< /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica +/Encoding 9 0 R >> +endobj +xref +0 11 +0000000000 65535 f +0000000021 00000 n +0000000163 00000 n +0000001609 00000 n +0000001692 00000 n +0000001804 00000 n +0000001837 00000 n +0000000212 00000 n +0000000292 00000 n +0000004532 00000 n +0000004789 00000 n +trailer +<< /Size 11 /Info 1 0 R /Root 2 0 R >> +startxref +4886 +%%EOF