Mercurial > repos > greg > ks_distribution
changeset 0:5ace8af6edb6 draft
Uploaded
author | greg |
---|---|
date | Mon, 01 May 2017 13:47:20 -0400 |
parents | |
children | 7c0f41432772 |
files | .shed.yml ks_distribution.R ks_distribution.xml macros.xml test-data/components.tabular test-data/output.pdf test-data/rates.tabular |
diffstat | 7 files changed, 534 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,11 @@ +name: plant_tribes_ks_distribution +owner: greg +description: | + Contains a tool that plots the distribution of synonymous substitution (Ks) rates and fits significant component(s). +homepage_url: https://github.com/dePamphilis/PlantTribes +long_description: | + Contains a tool that plots the distribution of synonymous substitution (Ks) rates and fits significant component(s). +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/ks_distribution +type: unrestricted +categories: +- Phylogenetics
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ks_distribution.R Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,115 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("-c", "--components_input"), action="store", dest="components_input", help="Ks significant components input dataset"), + make_option(c("-k", "--kaks_input"), action="store", dest="kaks_input", help="KaKs analysis input dataset"), + make_option(c("-o", "--output"), action="store", dest="output", help="Output dataset") +) + +parser <- OptionParser(usage="%prog [options] file", option_list=option_list) +args <- parse_args(parser, positional_arguments=TRUE) +opt <- args$options + + +get_num_components = function(components_data) +{ + # Get the max of the number_comp column. + number_comp = components_data[, 3] + num_components <- max(number_comp, na.rm=TRUE); + num_components +} + +get_pi_mu_var = function(components_data, num_components) +{ + # FixMe: enhance this to generically handle any integer value for num_components. + if (num_components == 1) + { + pi <- c(components_data[1, 9]); + mu <- c(components_data[1, 7]); + var <- c(components_data[1, 8]); + } + else if (num_components == 2) + { + pi <- c(components_data[2, 9], components_data[3, 9]); + mu <- c(components_data[2, 7], components_data[3, 7]); + var <- c(components_data[2, 8], components_data[3, 8]); + } + else if (num_components == 3) + { + pi <- c(components_data[4, 9], components_data[5, 9], components_data[6, 9]); + mu <- c(components_data[4, 7], components_data[5, 7], components_data[6, 7]); + var <- c(components_data[4, 8], components_data[5, 8], components_data[6, 8]); + } + else if (num_components == 4) + { + pi <- c(components_data[7, 9], components_data[8, 9], components_data[9, 9], components_data[10, 9]); + mu <- c(components_data[7, 7], components_data[8, 7], components_data[9, 7], components_data[10, 7]); + var <- c(components_data[7, 8], components_data[8, 8], components_data[9, 8], components_data[10, 8]); + } + return = c(pi, mu, var) + return +} + +plot_ks<-function(kaks_input, output, pi, mu, var) +{ + # Start PDF device driver to save charts to output. + pdf(file=output, bg="white") + # Change bin width + bin <- 0.05 * seq(0, 40); + kaks <- read.table(file=kaks_input, header=T); + kaks <- kaks[kaks$Ks<2,]; + h.kst <- hist(kaks$Ks, breaks=bin, plot=F); + nc <- h.kst$counts; + vx <- h.kst$mids; + ntot <- sum(nc); + # Set margin for plot bottom, left top, right. + par(mai=c(0.5, 0.5, 0, 0)); + # Plot dimension in inches. + par(pin=c(2.5, 2.5)); + g <- calculate_fitted_density(pi, mu, var); + h <- ntot * 2.5 / sum(g); + vx <- seq(1, 100) * 0.02; + ymax <- max(nc) + 5; + barplot(nc, space=0.25, offset=0, width=0.04, xlim=c(0,2), ylim=c(0, ymax)); + # Add x-axis. + axis(1); + color <- c('green', 'blue', 'black', 'red'); + for (i in 1:length(mu)) + { + lines(vx, g[,i] * h, lwd=2, col=color[i]); + } +}; + +calculate_fitted_density <- function(pi, mu, var) +{ + comp <- length(pi); + var <- var/mu^2; + mu <- log(mu); + #calculate lognormal density + vx <- seq(1, 100) * 0.02; + fx <- matrix(0, 100, comp); + for (i in 1:100) + { + for (j in 1:comp) + { + fx[i, j] <- pi[j] * dlnorm(vx[i], meanlog=mu[j], sdlog=(sqrt(var[j]))); + }; + }; + fx; +} + +# Read in the components data. +components_data <- read.delim(opt$components_input, header=TRUE); +# Get the number of components. +num_components <- get_num_components(components_data) + +# Set pi, mu, var. +items <- get_pi_mu_var(components_data, num_components); +pi <- items[1]; +mu <- items[2]; +var <- items[3]; + +# Plot the output. +plot_ks(opt$kaks_input, opt$output, pi, mu, var);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ks_distribution.xml Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,110 @@ +<tool id="ks_distribution" name="KsDistribution" version="1.0.0"> + <description>plots the distribution of synonymous substitution (Ks) rates and fits significant componets(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements_ks_distribution" /> + <expand macro="stdio" /> + <command> + <![CDATA[ + Rscript $__tool_directory__/ks_distribution.R + -k '${kaks}' + -c '${components}' + -o '$output' + ]]> + </command> + <inputs> + <param name="rates" format="tabular" type="data" label="Synonymous substitution rates" /> + <param name="components" format="tabular" type="data" label="Significant components" /> + </inputs> + <outputs> + <data name="output" format="pdf"/> + </outputs> + <tests> + <test> + <param name="rates" value="rates.tabular" ftype="tabular" /> + <param name="components" value="components.tabular" ftype="tabular" /> + <output name="output" file="output.pdf" ftype="pdf" compare="contains" /> + </test> + </tests> + <help> +**What it does** + +This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary analyses of +genome-scale gene families and transcriptomes. This tool uses the analysis results produced by the KaKsAnalysis tool to plot the +distribution of synonymous substitution (Ks) rates and fit the estimated significant normal mixtures component(s) onto the distribution. + +----- + +**Options** + + * **Required** + + - **Synonymous substitution rates** - estimated synonymous substitution (Ks) rates output file produced by the KaKsAnalysis tool selected from your history. + - **Synonymous components** - estimated significant component(s) output file produced by the KaKsAnalysis tool selected from your history. + </help> + <citations> + <expand macro="citation1" /> + <citation type="bibtex"> + @article{Wall2008, + journal = {Nucleic Acids Research}, + author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS}, + title = {PlantTribes: a gene and gene family resource for comparative genomics in plants}, + year = {2008}, + volume = {36}, + number = {suppl 1}, + pages = {D970-D976},} + </citation> + <citation type="bibtex"> + @article{Altschul1990, + journal = {Journal of molecular biology} + author = {3. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ}, + title = {Basic local alignment search tool}, + year = {1990}, + volume = {215}, + number = {3}, + pages = {403-410},} + </citation> + <citation type="bibtex"> + @article{Katoh2013, + journal = {Molecular biology and evolution}, + author = {4. Katoh K, Standley DM}, + title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability}, + year = {2013}, + volume = {30}, + number = {4}, + pages = {772-780},} + </citation> + <citation type="bibtex"> + @article{Yang2007, + journal = {Molecular biology and evolution}, + author = {5. Yang Z}, + title = {PAML 4: phylogenetic analysis by maximum likelihood}, + year = {2007}, + volume = {24}, + number = {8}, + pages = {1586-1591},} + </citation> + <citation type="bibtex"> + @article{Cui2006, + journal = {Genome Research}, + author = {6. Cui L, Wall PK, Leebens-Mack JH, Lindsay BG, Soltis DE, Doyle JJ, Soltis PS, Carlson JE, Arumuganathan K, Barakat A, Albert VA}, + title = {Widespread genome duplications throughout the history of flowering plants}, + year = {2006}, + volume = {16}, + number = {6}, + pages = {738-749},} + </citation> + <citation type="bibtex"> + @article{McLachlan1999, + journal = {Journal of Statistical Software}, + author = {7. McLachlan GJ, Peel D, Basford KE, Adams P}, + title = {The EMMIX software for the fitting of mixtures of normal and t-components}, + year = {1999}, + volume = {4}, + number = {2}, + pages = {1-14},} + </citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,138 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">0.8</token> + <xml name="requirements_assembly_post_processor"> + <requirements> + <requirement type="package" version="0.8">plant_tribes_assembly_post_processor</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_aligner"> + <requirements> + <requirement type="package" version="0.8">plant_tribes_gene_family_aligner</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_classifier"> + <requirements> + <requirement type="package" version="0.8">plant_tribes_gene_family_classifier</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_integrator"> + <requirements> + <requirement type="package" version="0.8">plant_tribes_gene_family_integrator</requirement> + </requirements> + </xml> + <xml name="requirements_kaks_analysis"> + <requirements> + <requirement type="package" version="0.8">plant_tribes_kaks_analysis</requirement> + </requirements> + </xml> + <xml name="requirements_ks_distribution"> + <requirements> + <requirement type="package" version="1.3.0">r-optparse</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_phylogeny_builder"> + <requirements> + <requirement type="package" version="0.8">plant_tribes_gene_family_phylogeny_builder</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:"/> + <exit_code range=":-1"/> + <regex match="Error:"/> + <regex match="Exception:"/> + </stdio> + </xml> + <xml name="param_codon_alignments"> + <param name="codon_alignments" type="select" label="Codon alignments"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </xml> + <xml name="param_method"> + <param name="method" type="select" label="Protein clustering method"> + <option value="gfam" selected="true">GFam</option> + <option value="orthofinder">OrthoFinder</option> + <option value="orthomcl">OrthoMCL</option> + </param> + </xml> + <xml name="param_options_type"> + <param name="options_type" type="select" label="Options Configuration"> + <option value="basic" selected="true">Basic</option> + <option value="advanced">Advanced</option> + </param> + </xml> + <xml name="param_orthogroup_fna"> + <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </xml> + <xml name="param_scaffold"> + <param name="scaffold" type="select" label="Gene family scaffold"> + <options from_data_table="plant_tribes_scaffolds" /> + <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." /> + </param> + </xml> + <xml name="param_sequence_type"> + <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)"> + <option value="protein" selected="true">Amino acid based</option> + <option value="dna">Nucleotide based</option> + </param> + </xml> + <xml name="cond_alignment_method"> + <conditional name="alignment_method_cond"> + <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method"> + <option value="mafft" selected="true">MAFFT</option> + <option value="pasta">PASTA</option> + </param> + <when value="mafft" /> + <when value="pasta"> + <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" /> + </when> + </conditional> + </xml> + <xml name="cond_remove_gappy_sequences"> + <conditional name="remove_gappy_sequences_cond"> + <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <conditional name="trim_type_cond"> + <param name="trim_type" type="select" label="Trimming method"> + <option value="gap_trimming" selected="true">Gap score based trimming</option> + <option value="automated_trimming">Automated heuristic trimming</option> + </param> + <when value="gap_trimming"> + <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" /> + </when> + <when value="automated_trimming" /> + </conditional> + <conditional name="remove_sequences_with_gaps_cond"> + <param name="remove_sequences_with_gaps" type="select" label="Remove sequences"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" /> + <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" /> + </when> + </conditional> + </when> + </conditional> + </xml> + <xml name="citation1"> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Wafula EK}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes},} + </citation> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/components.tabular Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,7 @@ +species n number_comp lnL AIC BIC mean variance porportion +species1 1160 1 -1404.9900 2813.98 2824.09 0.4426 0.1293 1.00 +species1 1160 2 -1353.6550 2717.31 2742.59 0.7376 0.1672 0.61 + 0.2000 0.0069 0.39 +species1 1160 3 -1323.7480 2663.50 2703.94 0.1214 0.0002 0.10 + 0.7759 0.1663 0.57 + 0.2428 0.0070 0.33
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.pdf Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,52 @@ +1 0 obj +<< +/Title (R Graphics Output) +/Creator (R) +>> +endobj +2 0 obj +<< /Type /Catalog /Pages 3 0 R >> +endobj +7 0 obj +<< /Type /Page /Parent 3 0 R /Contents 8 0 R /Resources 4 0 R >> +endobj +8 0 obj +<< +>> +stream +endobj +3 0 obj +<< /Type /Pages /Kids [ 7 0 R ] /Count 1 /MediaBox [0 0 504 504] >> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <</F2 10 0 R >> +/ExtGState << >> +/ColorSpace << /sRGB 5 0 R >> +>> +endobj +5 0 obj +[/ICCBased 6 0 R] +endobj +6 0 obj +<< /Alternate /DeviceRGB /N 3 /Length 2596 /Filter /FlateDecode >> +stream +9 0 obj +<< +/Type /Encoding /BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +10 0 obj +<< /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica +/Encoding 9 0 R >> +endobj +xref +0 11 +trailer +<< /Size 11 /Info 1 0 R /Root 2 0 R >> +startxref +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rates.tabular Mon May 01 13:47:20 2017 -0400 @@ -0,0 +1,101 @@ +SEQ1 SEQ2 Ka Ks Ka\Ks +species1_10079 species1_10080 0.1194 0.9782 0.1221 +species1_10818 species1_10817 0.0194 0.9541 0.0203 +species1_4505 species1_4506 0.0379 0.2411 0.1571 +species1_2890 species1_2889 0.0212 0.8520 0.0249 +species1_3386 species1_3385 0.0002 0.1698 0.0010 +species1_2266 species1_2265 0.0881 0.1515 0.5814 +species1_7052 species1_7051 0.0514 0.8172 0.0630 +species1_3374 species1_3373 0.0034 0.4496 0.0075 +species1_688 species1_687 0.1420 0.2970 0.4782 +species1_10669 species1_10670 0.0003 0.3216 0.0010 +species1_9810 species1_9809 0.1490 0.7782 0.1914 +species1_11440 species1_11439 0.0083 0.2177 0.0380 +species1_2713 species1_2714 0.1227 0.2133 0.5755 +species1_4698 species1_4697 0.1021 0.2207 0.4624 +species1_10939 species1_10940 0.0861 0.7844 0.1098 +species1_22 species1_21 0.1368 0.2515 0.5437 +species1_7149 species1_7150 0.0817 0.9899 0.0825 +species1_5139 species1_5140 0.1167 0.2669 0.4373 +species1_3430 species1_3429 0.0803 0.9931 0.0808 +species1_10208 species1_10207 0.0764 0.1098 0.6957 +species1_8410 species1_8409 0.0165 0.8114 0.0204 +species1_2452 species1_2451 0.0629 0.4523 0.1391 +species1_289 species1_290 0.0560 0.1590 0.3526 +species1_10028 species1_10027 0.0927 1.3078 0.0708 +species1_8242 species1_8241 0.0037 0.1390 0.0264 +species1_5661 species1_5662 0.2040 0.5982 0.3410 +species1_85 species1_86 0.0391 0.1012 0.3866 +species1_1660 species1_1659 0.0408 0.3674 0.1111 +species1_616 species1_615 0.0078 0.1315 0.0595 +species1_8043 species1_8044 0.0637 0.3891 0.1637 +species1_1465 species1_1466 0.1186 0.1725 0.6876 +species1_7761 species1_7762 0.2544 0.3513 0.7242 +species1_299 species1_300 0.1407 0.5325 0.2643 +species1_3563 species1_3564 0.0732 0.1679 0.4360 +species1_4847 species1_4848 0.4327 0.6173 0.7009 +species1_3344 species1_3343 0.0642 0.5842 0.1098 +species1_262 species1_261 0.1786 1.5710 0.1137 +species1_3725 species1_3726 0.1883 0.8863 0.2125 +species1_779 species1_780 0.0725 0.2543 0.2849 +species1_1818 species1_1817 0.0681 0.2312 0.2946 +species1_11023 species1_11024 0.1603 0.6655 0.2408 +species1_9653 species1_9654 0.0007 0.7308 0.0010 +species1_6805 species1_6806 0.1090 0.9048 0.1205 +species1_10221 species1_10222 0.0522 0.1314 0.3975 +species1_6011 species1_6012 0.0099 0.9179 0.0108 +species1_5208 species1_5207 0.0931 0.6077 0.1532 +species1_10692 species1_10691 0.0537 0.1868 0.2876 +species1_10036 species1_10035 0.2450 0.4664 0.5254 +species1_1382 species1_1381 0.0682 0.1241 0.5495 +species1_1350 species1_1349 0.0129 0.4414 0.0293 +species1_6889 species1_6890 0.0195 0.7789 0.0251 +species1_4426 species1_4425 0.0399 0.3048 0.1309 +species1_7405 species1_7406 0.0443 1.3004 0.0341 +species1_11014 species1_11013 0.0643 0.9027 0.0712 +species1_8683 species1_8684 0.1321 0.6680 0.1977 +species1_5287 species1_5288 0.0882 0.3480 0.2534 +species1_11671 species1_11672 0.1101 0.2895 0.3802 +species1_551 species1_552 0.0165 0.2045 0.0809 +species1_3396 species1_3395 0.3309 1.6105 0.2055 +species1_5398 species1_5397 0.0004 0.4077 0.0010 +species1_7959 species1_7960 0.0498 0.4343 0.1146 +species1_11030 species1_11029 0.0999 0.2613 0.3825 +species1_10250 species1_10249 0.0486 0.1441 0.3372 +species1_6254 species1_6253 0.6399 0.8894 0.7195 +species1_5438 species1_5437 0.0613 0.2224 0.2757 +species1_5728 species1_5727 0.0788 0.1384 0.5697 +species1_8814 species1_8813 0.1076 1.8253 0.0590 +species1_6351 species1_6352 0.2963 0.8001 0.3704 +species1_11253 species1_11254 0.1359 0.7304 0.1860 +species1_11373 species1_11374 0.0924 0.1261 0.7325 +species1_4903 species1_4904 0.1021 1.3915 0.0734 +species1_4326 species1_4325 0.0275 0.1845 0.1492 +species1_9047 species1_9048 0.0648 2.3728 0.0273 +species1_7487 species1_7488 0.0488 0.2406 0.2029 +species1_1125 species1_1126 0.0813 0.3900 0.2085 +species1_9140 species1_9139 0.1643 0.7126 0.2306 +species1_3355 species1_3356 0.0262 0.9844 0.0266 +species1_7068 species1_7067 0.0368 0.1055 0.3492 +species1_8341 species1_8342 0.0356 0.6112 0.0583 +species1_4871 species1_4872 0.0728 0.2256 0.3227 +species1_4408 species1_4407 0.1148 0.7863 0.1460 +species1_10007 species1_10008 0.1071 0.5462 0.1960 +species1_236 species1_235 0.0834 0.2601 0.3207 +species1_4571 species1_4572 0.0745 1.0106 0.0737 +species1_897 species1_898 0.0916 0.9281 0.0987 +species1_5161 species1_5162 0.0707 0.4099 0.1725 +species1_6899 species1_6900 0.0814 0.6724 0.1210 +species1_5749 species1_5750 0.0325 0.3209 0.1013 +species1_2843 species1_2844 0.0327 0.8263 0.0396 +species1_653 species1_654 0.0382 0.8706 0.0439 +species1_8698 species1_8697 0.2628 2.2612 0.1162 +species1_405 species1_406 0.1135 0.1936 0.5861 +species1_4782 species1_4781 0.0341 1.1289 0.0302 +species1_42 species1_41 0.1711 0.4618 0.3706 +species1_1330 species1_1329 0.1074 0.7932 0.1354 +species1_11127 species1_11128 0.1866 1.6162 0.1155 +species1_7639 species1_7640 0.0787 0.4300 0.1830 +species1_7061 species1_7062 0.0571 0.3851 0.1483 +species1_56 species1_55 0.0260 0.6935 0.0375 +species1_417 species1_418 0.0272 0.8148 0.0334