# HG changeset patch # User greg # Date 1493385656 14400 # Node ID 4a0837f2b9956c8875f26c375e76386fda53baf9 # Parent 5a5f80ea6306be99e6ae8f624c7ad02d21764d84 Uploaded diff -r 5a5f80ea6306 -r 4a0837f2b995 gene_family_aligner.xml --- a/gene_family_aligner.xml Tue Apr 11 14:22:26 2017 -0400 +++ b/gene_family_aligner.xml Fri Apr 28 09:20:56 2017 -0400 @@ -1,5 +1,5 @@ - aligns gene family sequences + aligns integrated orthologous gene family clusters macros.xml @@ -82,18 +82,18 @@ - - - + + + - + - + @@ -130,40 +130,86 @@ This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary -analyses of genome-scale gene families and transcriptomes. This tool aligns gene family sequences. +analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments +of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool. ----- **Required options** - * **Select type of data to sub sample** + * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was exectured, these could either be proteins or proteins and their corresponding coding sequences. - - **Gene family clusters** - sequences classified into gene family clusters. - - **Gene family clusters with corresponding coding sequences** - sequences classified into gene family clusters including corresponding coding sequences. + - **Proteins orthogroup fasta files** - proteins fasta files. + - **Protein and coding sequences orthogroup fasta files** - proteins and their corresponding coding sequences fasta files. - **Construct orthogroup multiple codon alignments** - construct orthogroup multiple codon alignments. - * **Select method for multiple sequence alignments** + * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments. - - **MAFFT algorithm** - mafft algorithm. - - **Pasta algorithm** - pasta algorithm. + - **MAFFT** - MAFFT algorithm. + - **PASTA** - PASTA algorithm. - - **Maximum number of iterations that the PASTA algorithm will execute** - maximum number of iterations that the PASTA algorithm will execute. + - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations. **Other options** - * **Remove gappy sequences in alignments** + * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options. - - **Select process used for gap trimming** - either nucleotide based or using trimAl's ML heuristic trimming approach + - **Trimming method** - multiple sequence alignment trimming method. PlantTribes trims alignments using two automated approaches implemented in trimAl. Gap score based trimming removes alignments sites that do not achieve a user specified gap score. For example, a setting of 0.1 removes sites that have gaps in 90% or more of the sequences in the multiple sequence alignment. The automated heuristic trimming approach determines the best automated trimAl method to trim a given alignment as described in the trimAl tutorial `trimAl`_. + +.. _trimAl: http://trimal.cgenomics.org - **Nucleotide based** - - **Remove sites in alignments with gaps of** - - **Maximum number of iterations** - maximum number of iterations for iterative orthogroups realignment, trimming and fitering + - **Gap score** - 1.0 - (the fraction of sequences with gap allowed in an alignment site). The score is restricted to the range 0.0 - 1.0. Zero value has no effect. + + - **Remove sequences** - select 'Yes' to remove sequences in multiple sequence alignments that do not achieve a user specified alignment coverage score. For example, a setting of 0.7 removes sequences with more than 30% gaps in the alignment. This option requires one of the trimming methods to be set. + + - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment. The score is restricted to the range 0.0 - 1.0. Zero value has no effect. + + - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect. - + + @article{Wall2008, + journal = {Nucleic Acids Research}, + author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS}, + title = {PlantTribes: a gene and gene family resource for comparative genomics in plants}, + year = {2008}, + volume = {36}, + number = {suppl 1}, + pages = {D970-D976},} + + + @article{Katoh2013, + journal = {Molecular biology and evolution}, + author = {3. Katoh K, Standley DM}, + title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability}, + year = {2013}, + volume = {30}, + number = {4}, + pages = {772-780},} + + + @article{Mirarab2014, + journal = {Research in Computational Molecular Biology (RECOMB)}, + author = {4. Mirarab S, Nguyen N, Warnow T}, + title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)}, + year = {2014}, + pages = {177–191}, + url = {https://github.com/smirarab/pasta},} + + + @article{Capella-Gutierrez2009, + journal = {Bioinformatics,}, + author = {5. Capella-Gutierrez S, Silla-Martínez JM, Gabaldón T}, + title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses}, + year = {2009}, + volume = {25}, + number = {15}, + pages = {1972-1973},} + diff -r 5a5f80ea6306 -r 4a0837f2b995 macros.xml --- a/macros.xml Tue Apr 11 14:22:26 2017 -0400 +++ b/macros.xml Fri Apr 28 09:20:56 2017 -0400 @@ -3,7 +3,7 @@ 0.8 - plant_tribes_assembly_post_processor + plant_tribes_assembly_post_processor @@ -59,13 +59,13 @@ - + - + @@ -78,9 +78,9 @@ - - - + + + @@ -90,31 +90,31 @@ - + - - - + + + - + - + - - + + @@ -130,34 +130,4 @@ url = {https://github.com/dePamphilis/PlantTribes},} - - - @article{Sasidharan2012, - journal = {Nucleic Acids Research}, - author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A}, - title = {GFam: a platform for automatic annotation of gene families}, - year = {2012}, - pages = {gks631},} - - - @article{Li2003, - journal = {Genome Research} - author = {3. Li L, Stoeckert CJ, Roos DS}, - title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes}, - year = {2003}, - volume = {13}, - number = {9}, - pages = {2178-2189},} - - - @article{Emms2015, - journal = {Genome Biology} - author = {4. Emms DM, Kelly S}, - title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy}, - year = {2015}, - volume = {16}, - number = {1}, - pages = {157},} - -