# HG changeset patch
# User greg
# Date 1493660840 14400
# Node ID 5ace8af6edb65d6212de7f4839b0b0e87548b39e
Uploaded
diff -r 000000000000 -r 5ace8af6edb6 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,11 @@
+name: plant_tribes_ks_distribution
+owner: greg
+description: |
+ Contains a tool that plots the distribution of synonymous substitution (Ks) rates and fits significant component(s).
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+ Contains a tool that plots the distribution of synonymous substitution (Ks) rates and fits significant component(s).
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/ks_distribution
+type: unrestricted
+categories:
+- Phylogenetics
diff -r 000000000000 -r 5ace8af6edb6 ks_distribution.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ks_distribution.R Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,115 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+
+option_list <- list(
+ make_option(c("-c", "--components_input"), action="store", dest="components_input", help="Ks significant components input dataset"),
+ make_option(c("-k", "--kaks_input"), action="store", dest="kaks_input", help="KaKs analysis input dataset"),
+ make_option(c("-o", "--output"), action="store", dest="output", help="Output dataset")
+)
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
+args <- parse_args(parser, positional_arguments=TRUE)
+opt <- args$options
+
+
+get_num_components = function(components_data)
+{
+ # Get the max of the number_comp column.
+ number_comp = components_data[, 3]
+ num_components <- max(number_comp, na.rm=TRUE);
+ num_components
+}
+
+get_pi_mu_var = function(components_data, num_components)
+{
+ # FixMe: enhance this to generically handle any integer value for num_components.
+ if (num_components == 1)
+ {
+ pi <- c(components_data[1, 9]);
+ mu <- c(components_data[1, 7]);
+ var <- c(components_data[1, 8]);
+ }
+ else if (num_components == 2)
+ {
+ pi <- c(components_data[2, 9], components_data[3, 9]);
+ mu <- c(components_data[2, 7], components_data[3, 7]);
+ var <- c(components_data[2, 8], components_data[3, 8]);
+ }
+ else if (num_components == 3)
+ {
+ pi <- c(components_data[4, 9], components_data[5, 9], components_data[6, 9]);
+ mu <- c(components_data[4, 7], components_data[5, 7], components_data[6, 7]);
+ var <- c(components_data[4, 8], components_data[5, 8], components_data[6, 8]);
+ }
+ else if (num_components == 4)
+ {
+ pi <- c(components_data[7, 9], components_data[8, 9], components_data[9, 9], components_data[10, 9]);
+ mu <- c(components_data[7, 7], components_data[8, 7], components_data[9, 7], components_data[10, 7]);
+ var <- c(components_data[7, 8], components_data[8, 8], components_data[9, 8], components_data[10, 8]);
+ }
+ return = c(pi, mu, var)
+ return
+}
+
+plot_ks<-function(kaks_input, output, pi, mu, var)
+{
+ # Start PDF device driver to save charts to output.
+ pdf(file=output, bg="white")
+ # Change bin width
+ bin <- 0.05 * seq(0, 40);
+ kaks <- read.table(file=kaks_input, header=T);
+ kaks <- kaks[kaks$Ks<2,];
+ h.kst <- hist(kaks$Ks, breaks=bin, plot=F);
+ nc <- h.kst$counts;
+ vx <- h.kst$mids;
+ ntot <- sum(nc);
+ # Set margin for plot bottom, left top, right.
+ par(mai=c(0.5, 0.5, 0, 0));
+ # Plot dimension in inches.
+ par(pin=c(2.5, 2.5));
+ g <- calculate_fitted_density(pi, mu, var);
+ h <- ntot * 2.5 / sum(g);
+ vx <- seq(1, 100) * 0.02;
+ ymax <- max(nc) + 5;
+ barplot(nc, space=0.25, offset=0, width=0.04, xlim=c(0,2), ylim=c(0, ymax));
+ # Add x-axis.
+ axis(1);
+ color <- c('green', 'blue', 'black', 'red');
+ for (i in 1:length(mu))
+ {
+ lines(vx, g[,i] * h, lwd=2, col=color[i]);
+ }
+};
+
+calculate_fitted_density <- function(pi, mu, var)
+{
+ comp <- length(pi);
+ var <- var/mu^2;
+ mu <- log(mu);
+ #calculate lognormal density
+ vx <- seq(1, 100) * 0.02;
+ fx <- matrix(0, 100, comp);
+ for (i in 1:100)
+ {
+ for (j in 1:comp)
+ {
+ fx[i, j] <- pi[j] * dlnorm(vx[i], meanlog=mu[j], sdlog=(sqrt(var[j])));
+ };
+ };
+ fx;
+}
+
+# Read in the components data.
+components_data <- read.delim(opt$components_input, header=TRUE);
+# Get the number of components.
+num_components <- get_num_components(components_data)
+
+# Set pi, mu, var.
+items <- get_pi_mu_var(components_data, num_components);
+pi <- items[1];
+mu <- items[2];
+var <- items[3];
+
+# Plot the output.
+plot_ks(opt$kaks_input, opt$output, pi, mu, var);
diff -r 000000000000 -r 5ace8af6edb6 ks_distribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ks_distribution.xml Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,110 @@
+
+ plots the distribution of synonymous substitution (Ks) rates and fits significant componets(s)
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary analyses of
+genome-scale gene families and transcriptomes. This tool uses the analysis results produced by the KaKsAnalysis tool to plot the
+distribution of synonymous substitution (Ks) rates and fit the estimated significant normal mixtures component(s) onto the distribution.
+
+-----
+
+**Options**
+
+ * **Required**
+
+ - **Synonymous substitution rates** - estimated synonymous substitution (Ks) rates output file produced by the KaKsAnalysis tool selected from your history.
+ - **Synonymous components** - estimated significant component(s) output file produced by the KaKsAnalysis tool selected from your history.
+
+
+
+
+ @article{Wall2008,
+ journal = {Nucleic Acids Research},
+ author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
+ title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
+ year = {2008},
+ volume = {36},
+ number = {suppl 1},
+ pages = {D970-D976},}
+
+
+ @article{Altschul1990,
+ journal = {Journal of molecular biology}
+ author = {3. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
+ title = {Basic local alignment search tool},
+ year = {1990},
+ volume = {215},
+ number = {3},
+ pages = {403-410},}
+
+
+ @article{Katoh2013,
+ journal = {Molecular biology and evolution},
+ author = {4. Katoh K, Standley DM},
+ title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
+ year = {2013},
+ volume = {30},
+ number = {4},
+ pages = {772-780},}
+
+
+ @article{Yang2007,
+ journal = {Molecular biology and evolution},
+ author = {5. Yang Z},
+ title = {PAML 4: phylogenetic analysis by maximum likelihood},
+ year = {2007},
+ volume = {24},
+ number = {8},
+ pages = {1586-1591},}
+
+
+ @article{Cui2006,
+ journal = {Genome Research},
+ author = {6. Cui L, Wall PK, Leebens-Mack JH, Lindsay BG, Soltis DE, Doyle JJ, Soltis PS, Carlson JE, Arumuganathan K, Barakat A, Albert VA},
+ title = {Widespread genome duplications throughout the history of flowering plants},
+ year = {2006},
+ volume = {16},
+ number = {6},
+ pages = {738-749},}
+
+
+ @article{McLachlan1999,
+ journal = {Journal of Statistical Software},
+ author = {7. McLachlan GJ, Peel D, Basford KE, Adams P},
+ title = {The EMMIX software for the fitting of mixtures of normal and t-components},
+ year = {1999},
+ volume = {4},
+ number = {2},
+ pages = {1-14},}
+
+
+
+
diff -r 000000000000 -r 5ace8af6edb6 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,138 @@
+
+
+ 0.8
+
+
+ plant_tribes_assembly_post_processor
+
+
+
+
+ plant_tribes_gene_family_aligner
+
+
+
+
+ plant_tribes_gene_family_classifier
+
+
+
+
+ plant_tribes_gene_family_integrator
+
+
+
+
+ plant_tribes_kaks_analysis
+
+
+
+
+ r-optparse
+
+
+
+
+ plant_tribes_gene_family_phylogeny_builder
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{None,
+ journal = {None},
+ author = {1. Wafula EK},
+ title = {Manuscript in preparation},
+ year = {None},
+ url = {https://github.com/dePamphilis/PlantTribes},}
+
+
+
diff -r 000000000000 -r 5ace8af6edb6 test-data/components.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/components.tabular Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,7 @@
+species n number_comp lnL AIC BIC mean variance porportion
+species1 1160 1 -1404.9900 2813.98 2824.09 0.4426 0.1293 1.00
+species1 1160 2 -1353.6550 2717.31 2742.59 0.7376 0.1672 0.61
+ 0.2000 0.0069 0.39
+species1 1160 3 -1323.7480 2663.50 2703.94 0.1214 0.0002 0.10
+ 0.7759 0.1663 0.57
+ 0.2428 0.0070 0.33
diff -r 000000000000 -r 5ace8af6edb6 test-data/output.pdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.pdf Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,52 @@
+1 0 obj
+<<
+/Title (R Graphics Output)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<< /Type /Catalog /Pages 3 0 R >>
+endobj
+7 0 obj
+<< /Type /Page /Parent 3 0 R /Contents 8 0 R /Resources 4 0 R >>
+endobj
+8 0 obj
+<<
+>>
+stream
+endobj
+3 0 obj
+<< /Type /Pages /Kids [ 7 0 R ] /Count 1 /MediaBox [0 0 504 504] >>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <>
+/ExtGState << >>
+/ColorSpace << /sRGB 5 0 R >>
+>>
+endobj
+5 0 obj
+[/ICCBased 6 0 R]
+endobj
+6 0 obj
+<< /Alternate /DeviceRGB /N 3 /Length 2596 /Filter /FlateDecode >>
+stream
+9 0 obj
+<<
+/Type /Encoding /BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+10 0 obj
+<< /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica
+/Encoding 9 0 R >>
+endobj
+xref
+0 11
+trailer
+<< /Size 11 /Info 1 0 R /Root 2 0 R >>
+startxref
+%%EOF
diff -r 000000000000 -r 5ace8af6edb6 test-data/rates.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rates.tabular Mon May 01 13:47:20 2017 -0400
@@ -0,0 +1,101 @@
+SEQ1 SEQ2 Ka Ks Ka\Ks
+species1_10079 species1_10080 0.1194 0.9782 0.1221
+species1_10818 species1_10817 0.0194 0.9541 0.0203
+species1_4505 species1_4506 0.0379 0.2411 0.1571
+species1_2890 species1_2889 0.0212 0.8520 0.0249
+species1_3386 species1_3385 0.0002 0.1698 0.0010
+species1_2266 species1_2265 0.0881 0.1515 0.5814
+species1_7052 species1_7051 0.0514 0.8172 0.0630
+species1_3374 species1_3373 0.0034 0.4496 0.0075
+species1_688 species1_687 0.1420 0.2970 0.4782
+species1_10669 species1_10670 0.0003 0.3216 0.0010
+species1_9810 species1_9809 0.1490 0.7782 0.1914
+species1_11440 species1_11439 0.0083 0.2177 0.0380
+species1_2713 species1_2714 0.1227 0.2133 0.5755
+species1_4698 species1_4697 0.1021 0.2207 0.4624
+species1_10939 species1_10940 0.0861 0.7844 0.1098
+species1_22 species1_21 0.1368 0.2515 0.5437
+species1_7149 species1_7150 0.0817 0.9899 0.0825
+species1_5139 species1_5140 0.1167 0.2669 0.4373
+species1_3430 species1_3429 0.0803 0.9931 0.0808
+species1_10208 species1_10207 0.0764 0.1098 0.6957
+species1_8410 species1_8409 0.0165 0.8114 0.0204
+species1_2452 species1_2451 0.0629 0.4523 0.1391
+species1_289 species1_290 0.0560 0.1590 0.3526
+species1_10028 species1_10027 0.0927 1.3078 0.0708
+species1_8242 species1_8241 0.0037 0.1390 0.0264
+species1_5661 species1_5662 0.2040 0.5982 0.3410
+species1_85 species1_86 0.0391 0.1012 0.3866
+species1_1660 species1_1659 0.0408 0.3674 0.1111
+species1_616 species1_615 0.0078 0.1315 0.0595
+species1_8043 species1_8044 0.0637 0.3891 0.1637
+species1_1465 species1_1466 0.1186 0.1725 0.6876
+species1_7761 species1_7762 0.2544 0.3513 0.7242
+species1_299 species1_300 0.1407 0.5325 0.2643
+species1_3563 species1_3564 0.0732 0.1679 0.4360
+species1_4847 species1_4848 0.4327 0.6173 0.7009
+species1_3344 species1_3343 0.0642 0.5842 0.1098
+species1_262 species1_261 0.1786 1.5710 0.1137
+species1_3725 species1_3726 0.1883 0.8863 0.2125
+species1_779 species1_780 0.0725 0.2543 0.2849
+species1_1818 species1_1817 0.0681 0.2312 0.2946
+species1_11023 species1_11024 0.1603 0.6655 0.2408
+species1_9653 species1_9654 0.0007 0.7308 0.0010
+species1_6805 species1_6806 0.1090 0.9048 0.1205
+species1_10221 species1_10222 0.0522 0.1314 0.3975
+species1_6011 species1_6012 0.0099 0.9179 0.0108
+species1_5208 species1_5207 0.0931 0.6077 0.1532
+species1_10692 species1_10691 0.0537 0.1868 0.2876
+species1_10036 species1_10035 0.2450 0.4664 0.5254
+species1_1382 species1_1381 0.0682 0.1241 0.5495
+species1_1350 species1_1349 0.0129 0.4414 0.0293
+species1_6889 species1_6890 0.0195 0.7789 0.0251
+species1_4426 species1_4425 0.0399 0.3048 0.1309
+species1_7405 species1_7406 0.0443 1.3004 0.0341
+species1_11014 species1_11013 0.0643 0.9027 0.0712
+species1_8683 species1_8684 0.1321 0.6680 0.1977
+species1_5287 species1_5288 0.0882 0.3480 0.2534
+species1_11671 species1_11672 0.1101 0.2895 0.3802
+species1_551 species1_552 0.0165 0.2045 0.0809
+species1_3396 species1_3395 0.3309 1.6105 0.2055
+species1_5398 species1_5397 0.0004 0.4077 0.0010
+species1_7959 species1_7960 0.0498 0.4343 0.1146
+species1_11030 species1_11029 0.0999 0.2613 0.3825
+species1_10250 species1_10249 0.0486 0.1441 0.3372
+species1_6254 species1_6253 0.6399 0.8894 0.7195
+species1_5438 species1_5437 0.0613 0.2224 0.2757
+species1_5728 species1_5727 0.0788 0.1384 0.5697
+species1_8814 species1_8813 0.1076 1.8253 0.0590
+species1_6351 species1_6352 0.2963 0.8001 0.3704
+species1_11253 species1_11254 0.1359 0.7304 0.1860
+species1_11373 species1_11374 0.0924 0.1261 0.7325
+species1_4903 species1_4904 0.1021 1.3915 0.0734
+species1_4326 species1_4325 0.0275 0.1845 0.1492
+species1_9047 species1_9048 0.0648 2.3728 0.0273
+species1_7487 species1_7488 0.0488 0.2406 0.2029
+species1_1125 species1_1126 0.0813 0.3900 0.2085
+species1_9140 species1_9139 0.1643 0.7126 0.2306
+species1_3355 species1_3356 0.0262 0.9844 0.0266
+species1_7068 species1_7067 0.0368 0.1055 0.3492
+species1_8341 species1_8342 0.0356 0.6112 0.0583
+species1_4871 species1_4872 0.0728 0.2256 0.3227
+species1_4408 species1_4407 0.1148 0.7863 0.1460
+species1_10007 species1_10008 0.1071 0.5462 0.1960
+species1_236 species1_235 0.0834 0.2601 0.3207
+species1_4571 species1_4572 0.0745 1.0106 0.0737
+species1_897 species1_898 0.0916 0.9281 0.0987
+species1_5161 species1_5162 0.0707 0.4099 0.1725
+species1_6899 species1_6900 0.0814 0.6724 0.1210
+species1_5749 species1_5750 0.0325 0.3209 0.1013
+species1_2843 species1_2844 0.0327 0.8263 0.0396
+species1_653 species1_654 0.0382 0.8706 0.0439
+species1_8698 species1_8697 0.2628 2.2612 0.1162
+species1_405 species1_406 0.1135 0.1936 0.5861
+species1_4782 species1_4781 0.0341 1.1289 0.0302
+species1_42 species1_41 0.1711 0.4618 0.3706
+species1_1330 species1_1329 0.1074 0.7932 0.1354
+species1_11127 species1_11128 0.1866 1.6162 0.1155
+species1_7639 species1_7640 0.0787 0.4300 0.1830
+species1_7061 species1_7062 0.0571 0.3851 0.1483
+species1_56 species1_55 0.0260 0.6935 0.0375
+species1_417 species1_418 0.0272 0.8148 0.0334