changeset 0:ef6cd36613a8 draft

Uploaded
author greg
date Wed, 01 Mar 2017 10:29:54 -0500
parents
children 431e4f3487a7
files kaks_analysis.xml plant_tribes_scaffolds.loc.sample test-data/species1.faa test-data/species1.fna test-data/species2.faa test-data/species2.fna tool_data_table_conf.xml.sample
diffstat 7 files changed, 214 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kaks_analysis.xml	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,156 @@
+<tool id="plant_tribes_kaks_analysis" name="Create multiple sequence alignments" version="0.7.0">
+    <description>and inferred maximum likelihood phylogenies</description>
+    <requirements>
+        <requirement type="package" version="0.7">plant_tribes_kaks_analysis</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+    <command>
+        <![CDATA[
+            #if str($options_type.options_type_selector) == 'advanced':
+                #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond
+                #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select
+                #set fit_components_cond = $options_type.fit_components_cond
+                #set fit_components = $fit_components_cond.fit_components
+            #end if
+            KaKsAnalysis
+            --config_dir '$scaffold.fields.path'
+            --num_threads \${GALAXY_SLOTS:-4}
+            --coding_sequences_species_1 '$coding_sequences_species_1'
+            --proteins_species_1 '$proteins_species_1'
+            --comparison $comparison
+            #if str($comparison) == 'orthologs':
+                --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
+                --proteins_species_2 '$comparison_cond.proteins_species_2'
+            #end if
+            #if str($options_type.options_type_selector) == 'advanced':
+                --min_coverage $min_coverage
+                --recalibration_rate $recalibration_rate
+                #if str($codeml_ctl_file_select) == 'yes':
+                    --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file'
+                    # No else block needed here because the default codeml_ctl config
+                    # will be used if the --codeml_ctl_file flag is missing.
+                #end if
+                #if str($fit_components) == 'yes':
+                    --num_of_components $fit_components_cond.num_of_components
+                    --min_ks $fit_components_cond.min_ks
+                    --max_ks $fit_components_cond.max_ks
+                #end if
+            #end if
+        ]]>
+    </command>
+    <inputs>
+        <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the species" />
+        <param name="proteins_species_1" format="fasta" type="data" label="Aamino acids (FNA) sequences fasta file for the species" />
+        <conditional name="comparison_cond">
+            <param name="comparison" type="select" label="Select method for pairwise sequence comparison to determine homolgous pairs" help="Cross species comparison requires selection of inputs for second species">
+                <option value="paralogs" selected="true">Self species comparison</option>
+                <option value="orthologs">Cross species comparison</option>
+            </param>
+            <when value="paralogs" />
+            <when value="orthologs">
+                <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the second species" />
+                <param name="proteins_species_2" format="fasta" type="data" label="Aamino acids (FNA) sequences fasta file for the second species" />
+            </when>
+        </conditional>
+        <conditional name="options_type">
+            <param name="options_type_selector" type="select" label="Options Configuration">
+                <option value="basic" selected="true">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="Minimum sequence pairwise coverage length between homologous pairs" />
+                <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Predetermined evolutionary rate for recalibrating synonymous subsitutions (ks) of species" />
+                <conditional name="codeml_ctl_file_cond">
+                    <param name="codeml_ctl_file_select" type="select" label="Select PAML codeml control file?" help="Used for ML analysis of protein-coding DNA sequences using codon substitution models, select No to use the default control file">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" />
+                    </when>
+                </conditional>
+                <conditional name="fit_components_cond">
+                    <param name="fit_components" type="select" label="Fit a mixture model of multivariate normal components to synonymous (ks) distribution?" help="Used to identify significant duplication events in a genome">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="num_of_components" type="integer" value="0" min="0" label="Number components to fit to synonymous subsitutions (ks) distribution" />
+                        <param name="min_ks" type="float" value="0.0" min="0.0" label="Lower limit of synonymous subsitutions (ks)" help="Reduces background noise from young paralogous pairs due to normal gene births and deaths in a genome" />
+                        <param name="max_ks" type="float" value="0.0" min="0.0" label="Upper limit of synonymous subsitutions (ks)" help="Excludes likey ancient paralogous pairs" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="ks_files" type="list">
+            <discover_datasets pattern="__name__" directory="kaksAnalysis_dir" visible="false" ext="fasta" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
+complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. orthologous or paralogous ks
+analyses of coding sequences and amino acid sequences.
+
+-----
+
+**Options**
+
+ * **Required options**
+
+  - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences.
+  - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
+  - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).  
+
+ * **Multiple sequence alignments options**
+
+  - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments.
+  - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs'.
+  - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments.
+  - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference.
+  - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs' or this tool will produce an error.
+
+ * **Phylogenetic trees options**
+
+  - **Phylogenetic trees inference method** - Phylogenetic trees inference method.
+  - **Select rooting order configuration for rooting trees??** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup.
+  - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree.
+  - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments.
+  - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments.
+
+ * **MSA quality control options**
+
+  - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps).
+  - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach.
+  - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps).
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{None,
+            author = {Eric Wafula},
+            title = {None},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes}
+            }
+        </citation>
+        <citation type="doi">10.1186/1471-2105-10-421</citation>
+        <citation type="doi">10.1093/molbev/msm088</citation>
+        <citation type="doi">10.18637/jss.v004.i02</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/species1.faa	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,16 @@
+>species1_11
+MGVSMGQGNPMGMHLLPSGSSSPRTSPSLRDPPLSLPVLPNSDLSVSLPDLHKLRRNEPVTSGILHVRDLSFLRPRSHNGDDDEETEEMTREQEEKYLQWRSSLVEKLAGIELNLERVKFRMSVEIPPSDDFRAMKKSWENFYASELLSSRNPVRKIAKRPDTILVRGVPSRWFAETRISSKASTLVTHTIIESC
+>species1_12
+MSAAAAALRPTEPLPLPSGLSLAPRLKLLLTFFRADLSVRPVDEWQLKTALLAFLRDPPLSLPVLPDSDLSVRTLPDLHKRRRDEPVASGVLHVRDLSFLRPRRRNGDDEEEEAEEMTREQEEEKYFQWRSSLVEKLAGIELNLEGVKFRMSVEIPPSDDFRAMKKSWENFYASELLSSRNPVRKIAKRPDTILVRGVPSRWFAETRISSKASTLVTHTIFSALGKIRNLNISSDDEWGAKQDGTNKEIISGLNCKVWVQFENYDDFNSAMQALCGRSLEKEGSRLKVDYEVTWDHEGFFRNAQYEPVRSNLEERNSSAHGRKKHYTSRIESDHRKRFRD
+>species1_15
+MKDGLSLSFALISSSPDSKCELLNSRPSCRAARRGESGLLIRRSYLRPCQCPFGDRMSEQQDSTSKSSSSSISSSTQESEEEVSITIGSLLAQAKNNSGHSLGRRLSQLGSIPHTPRVNGKIPNLDNATLDHERLSERLGNYGLAEFQIEGDGNCQFRALADQIFRNPDYHKHVRKLVMKQLKEFRKQYESYVPMEYKVYLKKMKRSGEWGDHLTLQAAADRFGAKICLLTSFRDTCLIEIVPRDVTPTRELWLSFWCEVHYNSLYATDDLLTRKTKKKHWLF
+>species1_16
+MSEQQDHASKSSCSSLSTSTQESEEDVTVGTLLTEAKNSGRSLGKRLSHLDSIPHTPRVNGQIPDVNNATIDHETLLERLGTYGLAEFQIEGDGNCQFRALADQIFRNPDYHKHVRKSVVKQLKEFRKHYEGYVPMEYKVYLKKMKRSGEWGDHVTLQAAADRFAAKICLLTSFRDTCLIEIVPRGATPTKELWLSFWSEVHYNSLYATEDLPNRKTRKKHWLF
+>species1_21
+MAGAGAGESLDLPVVDLASSDLAAAAKSVRKACVEYGFFYVVNHGAEGLAEKVFGESSKFFEQPLGEKMALLRNRNYLGYTPLGADKLDASSKFKGDLNENYCIGPIRKEGYQNDANQWPSEENFPCWKETMKLYHETALATGKRILSLIALSLNLDVEFFDCPVAFLRLLHYPGEANESDDGNYGASAHSDYGVLTLVATDGTPGLQICREKDRCPQLWEDVHHIEGALIVNIGDLLQRWTNCVFRSTLHRVVAVGKERYSVAFFLHTNPDLVVQCLESCCSEACPPRFPPIRSGDYLEDRLRARYK
+>species1_22
+MWGPHIILYLQPFFLLPSSHMSCVLGRPSAPSLDHPQQPNPPPVAPEKPPAVAKKAAEEEEEKKPPKQARRERHAWSSRSAAAEAVGLGLGGSFANRARGEQVAAGWPAWLSAVVGEAIDGWTLRRADSFEKIDKVRTPALALAIVGGGGRELSSSVLSVAQIGQGTYINVYKARDTVTGKIVALKKMGQVCFLLCKPSYRGDTAAGGRGGRRRQQQQTAALAEEESGMAGGGGGGNRLDLPVVDLASSDPRAAAESIRKACVESGFFYVVNHGVEEGLLKRLFAESSKFFELPMEEKIALRRNSNHRGYTPPYAEKLDPSSKFEGDLKESFYIGPIGDEGLQNDANQWPSEERLPSRRETIKMYHASALSTGKRILSLIALSLNLDAEFFENIGAFSCPSAFLRLLHYPGEVDDSDDGNYGASAHSDYGMITLLATDGTPGLQICREKNRNPQLWEDVHHIDGALIVNIGDLLERWTNCIYRSTVHRVVAVGKERYSAAFFLDPNPDLVVQCLESCCSESCPPRFSPIKSGDYLKERLSATYK
+>species1_35
+MAAATTSRRGPGAMDDENLTFETSPGVEVISSFDQMGIRDDLLRGIYAYGFEKPSAIQQRAVLPIISGRDVIAQAQSGTGKTSMISLSVCQIVDTAVREVQALILSPTRELAAQTERVMLAIGDFINIQVHACIGGKSIGEDIRKLEHGVHVVSGTPGRVCDMIKRRTLRTRAIKLLILDEADEMLGRGFKDQIYDVYRYLPPELQVCLISATLPHEILEMTSKFMTDPVRILVKRDELTLEGIKQFFVAVEKEEWKFDTLCDLYDTLTITQAVIFCNTKRKVDWLTERMRSNNFTVSAMHGDMPQKERDAIMGEFRSGATRVLITTDVWARGLDVQQVSLVINYDLPNNRELYIHRIGRSGRFGRKGVAINFVKKEDIRILRDIEQYYSTQIDEMPMNVADLI
+>species1_36
+MAAATTSRRGPGAMDDENLTFETSPGVEVISSFDQMGIREDLLRGIYAYGFEKPSAIQQRAVLPIISGRDVIAQAQSGTGKTSMISLSVCQIVDTAVREVQALILSPTRELAAQTERVMLAIGDYINIQVHACIGGKSIGEDIRKLEHGVHVVSGTPGRVCDMIKRRTLRTRAIKLLILDEADEMLGRGFKDQIYDVYRYLPPELQVCLISATLPHEILEMTSKFMTDPVRILVKRDELTLEGIKQFFVAVEKEEWKFDTLCDLYDTLTITQAVIFCNTKRKVDWLTERMRSNNFTVSAMHGDMPQKERDAIMGEFRSGATRVLITTDVWARGLDVQQVSLVINYDLPNNRELYIHRIGRSGRFGRKGVAINFVKKEDIRILRDIEQYYSTQIDEMPMNVADLI
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/species1.fna	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,16 @@
+>species1_11
+ATGGGTGTGAGTATGGGGCAAGGAAACCCAATGGGTATGCACTTGTTGCCATCTGGCAGCTCAAGTCCGCGCACCTCGCCTTCCCTCCGCGACCCGCCCCTCTCCCTCCCCGTCCTTCCCAACTCCGACCTCTCCGTGTCCCTCCCCGACCTGCATAAGCTTCGCCGCAATGAGCCCGTCACTTCGGGCATCCTCCACGTCCGCGACCTCTCATTCCTCCGCCCCCGCAGCCACAACGGGGATGATGATGAGGAGACCGAGGAGATGACCCGTGAGCAGGAGGAGAAGTACTTGCAGTGGAGGAGCTCCCTGGTCGAGAAGCTGGCCGGGATCGAGCTCAACCTCGAGAGGGTTAAGTTTCGGATGAGCGTCGAAATCCCGCCCTCCGATGACTTCAGGGCAATGAAGAAGTCTTGGGAGAATTTCTACGCCTCCGAGCTCCTCAGTAGCAGGAATCCTGTGAGGAAGATAGCGAAAAGGCCAGACACAATTCTTGTCCGTGGTGTGCCATCCAGGTGGTTTGCGGAGACGAGGATATCATCGAAAGCCTCCACACTGGTCACACACACTATCATCGAAAGCTGC
+>species1_12
+ATGTCCGCCGCCGCCGCCGCCCTCCGGCCGACCGAGCCGCTCCCCCTCCCGAGCGGCCTCTCCCTCGCGCCGCGCCTCAAGCTGCTCCTCACCTTCTTCCGCGCCGACCTCTCCGTCCGCCCCGTCGACGAGTGGCAGCTCAAGACCGCGCTCCTCGCCTTCCTCCGCGACCCGCCCCTCTCCCTCCCCGTCCTCCCCGACTCCGACCTCTCCGTGCGCACCCTCCCCGACCTGCATAAGCGCCGCCGCGACGAGCCCGTCGCCTCGGGCGTCCTCCACGTCCGCGACCTCTCCTTCCTCCGCCCACGCCGCCGCAACGGGGATGATGAGGAGGAGGAGGCCGAGGAGATGACCCGTGAGCAGGAGGAGGAGAAGTACTTCCAGTGGAGGAGCTCCCTGGTCGAGAAGCTGGCCGGGATCGAGCTCAACCTCGAGGGGGTTAAGTTTCGGATGAGCGTCGAGATCCCGCCCTCCGATGACTTCAGGGCAATGAAGAAGTCTTGGGAGAATTTCTACGCCTCCGAGCTCCTCAGTAGCAGGAATCCTGTGAGGAAGATAGCGAAAAGGCCAGACACCATTCTTGTCCGGGGTGTGCCATCCAGGTGGTTTGCGGAGACGAGGATATCATCGAAAGCCTCCACGCTGGTCACACACACTATTTTCTCGGCACTTGGTAAAATAAGGAACCTTAATATTTCTAGTGATGATGAATGGGGAGCAAAACAAGACGGAACCAATAAGGAGATTATATCTGGACTAAATTGCAAAGTGTGGGTGCAATTTGAGAACTACGACGATTTCAACAGTGCAATGCAGGCATTATGTGGACGTTCATTAGAAAAAGAAGGATCACGGTTGAAGGTAGACTATGAAGTAACTTGGGATCATGAAGGTTTCTTCCGCAATGCACAATACGAGCCTGTTCGCAGCAATTTAGAAGAGAGAAATTCATCGGCTCATGGAAGGAAGAAACATTACACATCGCGAATTGAGTCAGATCATAGAAAGAGATTTAGGGAT
+>species1_15
+ATGAAAGATGGCCTTTCTCTCTCCTTCGCTCTCATCAGCTCGAGCCCCGACAGCAAGTGTGAGCTACTGAACTCGAGACCCTCCTGTCGCGCGGCGCGGCGCGGCGAGAGTGGCCTTTTGATCCGACGAAGCTATCTAAGACCCTGCCAATGTCCATTTGGAGATAGGATGTCGGAACAGCAGGATAGTACTAGTAAAAGCTCTAGCTCAAGCATCAGCAGCAGTACACAGGAGAGCGAGGAGGAGGTATCTATAACAATAGGTAGCCTCCTCGCCCAAGCAAAGAACAACAGTGGGCATAGTCTTGGAAGGCGCCTCTCTCAATTGGGTTCAATCCCGCACACTCCTCGAGTTAATGGAAAAATCCCTAATCTTGATAATGCAACTTTGGATCATGAAAGATTGTCGGAAAGGTTGGGAAATTATGGTTTGGCCGAGTTTCAAATAGAGGGTGATGGAAATTGTCAGTTCCGAGCTTTGGCAGACCAGATATTTCGCAACCCCGATTATCACAAACATGTGAGAAAGTTAGTCATGAAACAGCTAAAGGAATTCAGAAAACAGTATGAAAGCTATGTACCTATGGAATATAAAGTCTACTTGAAGAAAATGAAAAGATCTGGGGAATGGGGGGATCATCTGACTTTACAAGCAGCTGCAGACAGGTTTGGTGCCAAAATTTGTTTGCTGACGTCATTCAGAGACACCTGCCTAATTGAGATAGTCCCCAGGGATGTGACTCCCACAAGGGAGTTGTGGCTAAGCTTCTGGTGTGAAGTGCACTACAATTCCTTGTACGCAACTGACGATCTCCTAACCCGCAAAACCAAGAAGAAGCATTGGTTGTTC
+>species1_16
+ATGTCTGAACAACAGGATCATGCTAGCAAAAGTTCTTGCTCAAGTCTTAGCACCAGTACTCAGGAGAGTGAGGAGGATGTGACAGTTGGTACCCTTTTAACTGAAGCAAAGAACAGTGGACGGAGTCTTGGAAAACGCCTTTCCCACTTAGATTCTATCCCGCACACTCCTCGAGTTAATGGGCAAATTCCTGATGTTAATAATGCAACAATAGACCATGAAACATTACTGGAAAGATTGGGCACTTATGGCTTAGCTGAATTCCAAATTGAAGGAGACGGAAATTGTCAGTTCCGAGCTTTGGCAGATCAGATATTCCGCAATCCTGACTATCACAAACATGTGAGGAAGTCAGTCGTGAAGCAGCTAAAGGAATTCAGGAAACACTATGAAGGCTATGTACCGATGGAATATAAGGTGTACTTGAAGAAAATGAAAAGATCTGGAGAATGGGGAGATCATGTGACCTTACAAGCGGCTGCAGACCGGTTTGCTGCCAAGATTTGCCTGCTGACATCATTTAGAGACACATGCCTAATCGAGATAGTCCCCAGAGGTGCCACTCCCACAAAAGAGCTTTGGTTAAGCTTCTGGAGTGAGGTGCACTACAATTCCTTGTATGCAACTGAAGATCTTCCAAATCGCAAGACCAGAAAGAAGCACTGGCTGTTC
+>species1_21
+ATGGCCGGCGCCGGCGCCGGCGAGAGCCTGGACCTCCCCGTGGTGGACCTAGCGTCCTCCGACCTCGCCGCCGCCGCCAAATCCGTCCGAAAGGCTTGCGTGGAGTACGGATTCTTCTACGTGGTCAACCATGGAGCCGAGGGATTGGCGGAGAAGGTGTTCGGGGAGAGCAGCAAGTTTTTCGAGCAGCCGCTGGGGGAGAAGATGGCGCTGCTGAGGAACAGAAACTACCTGGGGTACACCCCGCTTGGCGCCGATAAGCTCGACGCCTCGTCCAAATTCAAAGGAGATCTCAATGAAAATTACTGTATCGGACCTATCAGAAAAGAAGGTTATCAGAATGATGCTAACCAATGGCCTTCTGAAGAGAATTTCCCATGTTGGAAGGAGACAATGAAGCTATACCATGAAACTGCACTTGCTACTGGTAAAAGGATACTCTCTCTAATTGCTCTGAGTTTGAATCTCGACGTTGAATTCTTTGACTGCCCAGTGGCCTTTCTTCGGTTATTGCACTACCCAGGTGAAGCTAACGAGTCCGATGATGGCAATTATGGTGCATCAGCTCACTCAGACTATGGAGTACTAACACTTGTAGCAACAGATGGCACTCCTGGGCTGCAGATATGCAGGGAGAAGGATAGGTGCCCCCAGCTTTGGGAAGACGTTCATCACATTGAAGGGGCCCTGATTGTTAATATCGGCGATTTGCTACAAAGGTGGACTAATTGTGTTTTCAGGTCTACACTGCATCGCGTTGTTGCAGTTGGTAAAGAGCGATACTCTGTGGCTTTCTTTCTTCACACAAACCCTGATTTAGTGGTTCAATGCTTGGAAAGCTGCTGCAGTGAGGCATGCCCACCGAGGTTCCCACCTATAAGGAGCGGCGACTATTTGGAAGACCGATTGAGGGCTAGATACAAA
+>species1_22
+ATGTGGGGCCCACATATCATCCTCTATCTCCAACCCTTCTTCCTCCTCCCTTCCTCTCACATGAGCTGCGTCCTCGGCCGCCCCTCCGCCCCCTCCCTCGACCACCCCCAGCAGCCCAACCCCCCGCCCGTCGCCCCGGAGAAGCCGCCCGCCGTCGCCAAGAAGGCGGCCGAGGAGGAGGAGGAGAAGAAGCCGCCGAAGCAGGCTAGGAGGGAGAGGCACGCATGGTCGTCGCGGTCTGCCGCCGCCGAGGCGGTCGGCCTGGGGCTCGGGGGGAGCTTCGCCAACAGGGCGCGCGGGGAGCAGGTGGCGGCCGGCTGGCCCGCCTGGCTCTCCGCCGTCGTCGGCGAGGCCATCGACGGCTGGACCCTGCGCCGCGCCGACTCCTTCGAGAAGATCGACAAGGTACGTACTCCTGCCCTCGCGCTCGCCATTGTTGGTGGTGGGGGAAGGGAACTGAGCTCATCGGTCTTGTCGGTGGCGCAGATCGGGCAGGGGACGTACATCAACGTGTACAAGGCGCGGGACACGGTGACGGGCAAGATCGTGGCGCTCAAGAAGATGGGCCAAGTTTGCTTCCTTCTCTGTAAGCCCAGTTACCGTGGGGATACAGCCGCCGGCGGACGCGGAGGGCGGCGGCGGCAGCAGCAGCAAACCGCCGCTTTGGCAGAAGAGGAATCCGGGATGGCCGGCGGCGGCGGCGGCGGGAATCGCCTGGACCTCCCCGTGGTGGACCTCGCGTCCTCCGACCCCCGAGCCGCCGCCGAGTCCATCCGAAAGGCGTGCGTGGAGTCCGGATTCTTCTACGTGGTCAACCATGGGGTGGAGGAGGGATTGCTGAAGAGGTTGTTCGCGGAGAGCTCGAAGTTCTTCGAGCTGCCGATGGAGGAGAAGATAGCGCTGCGGAGGAACAGCAACCACCGGGGATACACCCCGCCCTACGCCGAGAAGCTCGATCCCTCGTCCAAATTCGAAGGAGACCTCAAGGAAAGTTTCTATATTGGGCCTATTGGAGATGAAGGTTTGCAGAATGATGCTAACCAGTGGCCTTCTGAAGAGCGCTTACCAAGTCGGAGGGAGACAATTAAGATGTACCATGCAAGTGCACTGTCTACTGGCAAAAGGATACTCTCTCTAATCGCTCTGAGTTTGAATCTTGACGCTGAATTCTTTGAGAACATTGGTGCCTTCAGCTGCCCATCAGCATTTCTTCGATTATTGCACTACCCAGGTGAAGTAGACGACTCTGATGATGGCAATTATGGTGCATCAGCTCACTCTGATTATGGAATGATAACCCTCCTAGCAACAGACGGCACTCCTGGGCTACAGATATGCAGGGAAAAGAATAGGAATCCCCAGCTCTGGGAAGATGTTCATCACATTGATGGGGCCCTGATTGTTAACATTGGCGATTTGCTAGAAAGGTGGACGAATTGTATTTACAGGTCTACAGTGCACCGTGTTGTTGCAGTTGGTAAAGAGCGATATTCTGCGGCTTTTTTTCTTGACCCAAACCCTGATTTAGTGGTTCAGTGTTTGGAAAGCTGTTGCAGCGAGTCATGCCCACCGAGGTTCTCACCTATAAAGAGTGGCGACTATTTGAAAGAGCGATTGAGCGCTACATACAAA
+>species1_35
+ATGGCGGCGGCCACCACGTCGCGGCGCGGCCCGGGCGCCATGGACGACGAGAACCTCACCTTCGAGACCTCCCCGGGGGTCGAGGTCATCAGCAGCTTCGACCAGATGGGGATCCGCGACGACCTCCTCCGCGGCATCTACGCCTACGGCTTCGAGAAGCCCTCCGCCATCCAGCAGCGCGCCGTCCTCCCCATCATCAGCGGCCGCGACGTCATCGCCCAGGCCCAGTCCGGGACCGGCAAGACCTCCATGATCTCGCTCTCCGTCTGCCAGATCGTAGACACCGCCGTCCGTGAGGTGCAGGCTTTAATACTGTCACCAACTAGAGAACTTGCTGCACAAACAGAAAGAGTTATGCTGGCTATCGGTGACTTCATCAATATCCAAGTGCATGCTTGTATTGGTGGCAAAAGTATTGGTGAGGATATTAGAAAGCTTGAGCACGGAGTGCATGTGGTGTCAGGAACACCTGGCAGAGTCTGTGATATGATCAAGAGAAGGACCTTGCGTACAAGAGCCATTAAGCTCCTAATTCTGGATGAAGCTGATGAGATGTTGGGCAGAGGCTTTAAGGATCAGATATATGATGTGTACAGATACCTCCCTCCAGAACTCCAGGTTTGCTTGATCTCCGCAACTCTGCCTCACGAGATCTTGGAAATGACCAGCAAGTTCATGACTGATCCAGTTCGGATCCTTGTGAAGCGTGATGAATTGACTCTAGAGGGCATCAAACAATTCTTTGTTGCTGTTGAGAAAGAAGAATGGAAGTTTGACACGCTTTGTGATCTTTATGATACACTGACAATCACCCAAGCTGTCATTTTCTGCAACACAAAGAGAAAGGTTGATTGGCTTACGGAAAGAATGCGCAGCAATAACTTCACAGTATCAGCTATGCATGGCGACATGCCTCAAAAGGAAAGGGATGCCATTATGGGTGAATTCAGGTCTGGTGCAACCCGTGTTCTAATCACGACAGATGTGTGGGCTCGAGGCCTCGATGTTCAGCAGGTCTCTCTTGTCATAAATTATGATCTCCCAAATAATCGTGAACTTTACATCCATCGCATTGGTCGCTCTGGACGTTTTGGTCGCAAGGGTGTGGCCATCAATTTTGTCAAAAAGGAAGACATCCGTATCCTGAGAGATATCGAGCAGTACTACAGCACGCAGATTGATGAAATGCCAATGAATGTTGCTGATCTAATT
+>species1_36
+ATGGCGGCGGCCACCACGTCCCGGCGCGGCCCCGGCGCCATGGACGACGAGAACCTCACCTTCGAGACCTCCCCCGGGGTCGAGGTCATCAGCAGCTTCGACCAGATGGGGATCCGCGAGGACCTCCTCCGCGGCATCTACGCCTACGGCTTCGAGAAGCCCTCCGCCATCCAGCAGCGCGCCGTCCTCCCCATCATCAGCGGCCGCGACGTCATCGCCCAGGCCCAGTCCGGAACCGGCAAGACCTCCATGATCTCGCTCTCCGTCTGCCAGATCGTCGACACCGCCGTCCGAGAGGTTCAGGCCTTGATACTCTCACCAACTAGAGAACTTGCTGCACAAACAGAAAGAGTTATGCTGGCCATTGGTGATTACATCAATATCCAAGTGCATGCTTGTATTGGTGGCAAAAGTATTGGTGAGGATATTAGAAAGCTTGAGCATGGAGTGCATGTTGTGTCAGGAACACCTGGCAGAGTCTGTGATATGATCAAGAGAAGGACCTTGCGTACAAGAGCCATTAAGCTCCTAATTCTGGATGAAGCCGATGAGATGTTGGGCAGAGGCTTTAAGGATCAGATATATGATGTCTACAGATATCTACCCCCAGAGCTCCAGGTTTGCTTGATCTCCGCAACTCTGCCACATGAGATCTTGGAAATGACCAGCAAGTTCATGACTGACCCAGTCCGGATCCTTGTAAAGCGTGATGAATTGACCCTAGAGGGCATCAAACAATTCTTTGTTGCTGTTGAGAAAGAAGAATGGAAGTTTGATACTCTTTGTGATCTTTATGATACACTGACAATCACCCAAGCTGTCATTTTCTGCAACACGAAGAGAAAGGTTGATTGGCTTACAGAAAGAATGCGCAGCAATAACTTCACGGTATCAGCTATGCATGGTGACATGCCTCAAAAGGAAAGGGATGCCATTATGGGTGAATTCAGGTCTGGTGCAACCCGTGTTCTAATTACGACAGATGTGTGGGCTCGAGGCCTGGATGTTCAGCAGGTCTCTCTTGTCATAAACTATGATCTTCCAAATAATCGTGAACTTTACATCCATCGCATTGGTCGCTCTGGACGTTTTGGTCGCAAGGGTGTGGCCATCAATTTTGTCAAAAAGGAAGACATCCGTATCCTGAGAGATATTGAGCAGTACTACAGCACACAGATTGATGAAATGCCAATGAATGTTGCTGATCTAATT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/species2.faa	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,8 @@
+>species2_5
+MESQSAVVPLIAELPEKRGGKTLVEEVWEESKKLWEVTGPAAFTGMVLYSMTIVSQAFAGHLGDRHLAAFSIANTVISGLNFGILLGMASALETLCGQAYGAKQYSMMGTYLQRSWLVLLAFAVLLAPTYIFSGQLLMVLGQPAELSREAGLLGMYLLPLHLMFAIQLPLNKFLQCQRKNWVIALSSVLGFPVHVVATWLLAQRFQLGVLGAAMSLNLSWALITGLQLAYAVGGGCPETWRGFSSSAFMGLKDFVSLSVASGVMTCLESWYYRLLIFLTAYAKNAELAVDALSICLSWAGWEMMIHFGFLAGTGVRVANELGANNGRAAKFATIVSTTTSFLICLLISSLALIFHDKLAILFTSSEAVIDAVDGISVLLALTILLNGIQPVLSGVAVGSGWQALVAYVNIGSYYIIGVPFGVLLAWGFHYGVLGIWVGMIGGTMVQTLILSFITLRCDWNEEALKASSRMRTWSSSK
+>species2_6
+MEENRSDIPLISGSELPDRRGGGKISELAKEVWGESKKLWVVAGPAAFTRLTFYGMTVVSQAFAGHIGDLELAAFSIATTVISGLSFGFFVGMASAMETLCGQAYGAKQYHMMGIYLQRSWLILLSFAVLLTPTYIFSEQLLTALGQPAELSRQAGLVSLYMLPLHFVYAIVLPLNKFLQCQRKNWVAAVTTAAAFPVHVVATWLLVRCFRLGVFGAAMALTLSWALATVGLLSYALGGGCPETWRGFSASAFVDLKDFIKLSAASGVMLCLENWYYRILVFLTGYVKNAELAVDALSICISYAGWEMMIHLGFLAGTGVRVANELGAANGARARFATIVSMTTSFLISLFISLLILIFHDKLGMIFSSSQAVIDAVDNISFLLALTILLNGIQPVLSGVAVGSGWQALVAYVNIGSYYLIGVPFGFLLGWGLHYGVQGIWVGMIVGTMVQTLILAYITLRCDWNEEALKASTRMRRWSNSK
+>species2_9
+MGTLGGHVAPGAFFFLIGLWHLFGHSRLFLLQRGSYVAPVWFPVPGVRHIELIMIIIGSVISVSMELVIVQPKHQPFDDDGTIPSVHLHNFEHASISLAWLVFAAATIHMDRVRAPMRDAVSQLAAAAAFAQQLLIFHFHSADHAGVQGRYHRLLEMVVAVTLAASLLLIPYQRSIALSLVRSASLVFQGVWFTVMGVMMWTPALVPKGCFMNDEDGLQVVRCRTDEALDRAKSLVNLQFNWYLTGTVAFVVVFYLQMAKQYQEQPQYAPLVKGGRGSDGRCTIGEVNDDEDDLEASKGGLGYIEIER
+>species2_10
+MGTLVGHVAPGAGFLLIGLWQLFSHIRLFLLRPSSYSAPVWFPAPGVRHLELILIIIGAAMSILMELVIGPAKHQPFDDDGTIPSDHLHNFEHASISLALLVFAAVTIHLDRVKAPLRDAVSQLVAAAAFAQQLLIFHLHSADHMGVEGQFHWLLQTVIAVTLATTLLGIPYPRSIVVSLVRSASLVLQGVWFVVMGVMLWTPALIPKGCFLNLEEGHDVVRCRTDEALDRAKSLVNLQFSWYLTGTVVFVVLFYLQMAKLYPEEPQYLPLVKGGGGGGDDRDSRFSIGDDDHDDEDDVEAAKRGFGHVVSGTKPVEIER
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/species2.fna	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,8 @@
+>species2_5
+ATGGAGAGTCAGAGCGCCGTCGTCCCGCTCATTGCCGAGCTCCCGGAGAAGCGGGGAGGCAAAACCCTGGTGGAGGAGGTATGGGAGGAGTCCAAGAAGCTGTGGGAAGTCACCGGCCCGGCCGCCTTTACGGGGATGGTACTCTACAGCATGACCATCGTCAGCCAGGCCTTCGCCGGCCACCTTGGTGACCGCCATCTCGCCGCTTTCTCCATCGCCAACACCGTCATATCTGGCCTTAACTTTGGCATTTTGCTTGGCATGGCGAGTGCGCTGGAGACATTATGCGGCCAAGCCTACGGTGCAAAGCAGTACTCGATGATGGGCACCTATCTCCAGCGCTCATGGCTCGTCCTCCTCGCCTTCGCGGTGCTCCTTGCTCCGACGTACATCTTCAGCGGGCAGCTGCTCATGGTCCTGGGCCAGCCCGCCGAGCTGTCTCGCGAGGCGGGCTTGCTCGGCATGTACCTGCTCCCGCTGCACCTCATGTTTGCCATCCAGCTGCCGCTCAACAAGTTCTTGCAGTGCCAGCGCAAGAACTGGGTCATCGCGCTGTCCTCGGTGCTGGGTTTCCCGGTGCACGTCGTGGCGACCTGGCTGCTGGCGCAGCGCTTTCAGCTTGGCGTCCTGGGCGCAGCGATGTCACTCAACCTGTCCTGGGCGCTCATCACGGGCCTGCAGCTCGCGTACGCTGTTGGCGGTGGGTGCCCAGAGACGTGGAGAGGGTTCTCGTCGTCGGCATTCATGGGCTTGAAGGACTTCGTCAGCTTGTCCGTCGCGTCGGGAGTCATGACGTGCTTGGAGAGTTGGTACTACCGGTTATTGATTTTCCTAACGGCGTACGCGAAGAACGCAGAATTGGCTGTGGATGCACTGTCTATCTGCTTGAGTTGGGCTGGATGGGAGATGATGATTCATTTCGGGTTCTTAGCAGGCACTGGGGTGAGGGTTGCCAATGAGCTAGGCGCCAATAATGGACGAGCTGCAAAGTTTGCGACGATCGTGTCCACGACGACATCATTCCTGATCTGCCTCTTAATTAGTTCACTCGCACTCATTTTCCATGACAAACTCGCAATACTGTTCACGTCTAGTGAGGCTGTGATCGATGCAGTTGACGGTATTTCTGTTCTGCTAGCCCTCACCATCCTCCTCAATGGCATCCAACCTGTGCTATCCGGAGTTGCCGTTGGTTCAGGGTGGCAAGCGCTAGTTGCGTATGTGAACATTGGGAGCTACTACATTATCGGTGTTCCTTTCGGTGTTCTGCTAGCATGGGGTTTCCACTACGGGGTCCTTGGCATTTGGGTTGGAATGATCGGTGGCACGATGGTGCAAACTCTGATTCTTTCATTTATCACCTTACGATGCGACTGGAATGAAGAGGCACTGAAAGCTTCTAGCAGAATGCGGACATGGAGCAGCTCCAAG
+>species2_6
+ATGGAGGAGAATCGGAGCGATATCCCGCTCATCTCCGGCTCCGAGCTGCCGGACAGGAGGGGAGGAGGCAAGATCTCCGAGCTTGCGAAGGAGGTATGGGGAGAGTCCAAGAAGCTGTGGGTGGTCGCCGGCCCGGCCGCGTTCACGAGGCTGACATTCTATGGCATGACCGTGGTCAGCCAGGCCTTTGCCGGGCACATCGGTGACCTCGAGCTCGCCGCCTTCTCCATAGCCACCACCGTCATTTCTGGTCTCAGCTTTGGCTTCTTTGTTGGCATGGCGAGTGCAATGGAGACGCTGTGCGGCCAAGCCTACGGTGCAAAGCAGTACCACATGATGGGCATCTACCTGCAGCGCTCGTGGCTCATCCTCCTCAGCTTCGCCGTGCTTCTTACTCCGACCTACATCTTCAGCGAGCAGCTGCTCACCGCGCTGGGCCAGCCCGCCGAGCTGTCGCGCCAGGCGGGCTTGGTCAGCCTGTACATGCTCCCGCTGCACTTCGTCTACGCCATCGTCCTGCCGCTCAACAAGTTCCTGCAGTGCCAGCGCAAGAACTGGGTCGCCGCGGTCACCACGGCCGCGGCGTTCCCCGTTCACGTCGTCGCCACCTGGCTGCTGGTGCGTTGCTTCCGGCTCGGGGTCTTTGGAGCAGCGATGGCGCTCACCCTGTCCTGGGCACTCGCCACGGTGGGTCTCCTCTCGTATGCCTTGGGCGGCGGGTGCCCGGAGACGTGGAGGGGATTCTCAGCTTCTGCCTTCGTGGACTTGAAGGACTTCATCAAGTTGTCCGCGGCGTCTGGTGTCATGCTCTGCTTGGAGAATTGGTACTACCGGATCTTGGTTTTCCTGACGGGCTATGTGAAGAACGCTGAACTGGCTGTCGATGCACTGTCCATCTGTATAAGTTATGCTGGATGGGAGATGATGATTCATTTGGGATTCTTAGCAGGCACTGGGGTGAGGGTGGCTAATGAGCTCGGTGCAGCCAACGGAGCACGAGCGAGATTTGCGACAATTGTGTCGATGACGACATCATTTCTGATCAGCCTATTCATTAGTTTGCTCATCCTGATTTTCCATGACAAACTCGGAATGATCTTCTCGTCGAGTCAGGCTGTGATTGATGCAGTAGACAACATTTCCTTTCTGCTGGCCCTCACCATCCTCCTCAACGGAATCCAACCTGTGCTCTCTGGAGTTGCTGTTGGCTCAGGGTGGCAGGCATTGGTTGCTTATGTCAACATTGGGAGCTATTACTTGATTGGTGTTCCTTTCGGTTTTCTGCTAGGATGGGGCTTGCATTATGGGGTTCAAGGAATTTGGGTCGGAATGATCGTTGGCACAATGGTGCAAACTCTAATACTGGCATATATCACTCTACGGTGTGATTGGAATGAAGAGGCATTGAAAGCTAGTACCCGAATGCGGAGATGGAGCAACTCCAAG
+>species2_9
+ATGGGCACACTAGGCGGGCACGTCGCGCCGGGCGCCTTCTTCTTCCTCATCGGCCTGTGGCATCTGTTCGGCCACAGCCGCCTGTTCTTGCTACAGCGGGGCTCCTACGTGGCTCCGGTGTGGTTCCCGGTGCCGGGCGTCCGTCACATCGAGCTCATAATGATAATAATCGGCTCGGTGATCTCCGTCTCGATGGAGCTCGTCATCGTGCAGCCGAAGCACCAGCCGTTCGACGACGACGGCACCATCCCCAGCGTCCACCTGCACAACTTCGAGCACGCGTCCATCTCGCTGGCGTGGCTCGTCTTCGCCGCCGCCACCATCCACATGGACAGGGTCCGGGCGCCGATGCGGGACGCGGTGTCGCAGCTGGCGGCCGCGGCCGCGTTCGCGCAGCAGCTGCTCATCTTCCACTTCCACTCCGCGGACCACGCGGGCGTGCAGGGGCGGTACCACCGTCTGCTGGAGATGGTGGTCGCCGTCACGCTCGCCGCCTCGCTGCTCTTGATCCCCTACCAACGGAGCATCGCGCTGAGCCTGGTCCGCTCGGCCAGCCTCGTGTTCCAGGGCGTCTGGTTCACCGTCATGGGCGTCATGATGTGGACGCCGGCGCTCGTCCCCAAAGGCTGCTTCATGAACGACGAAGATGGCCTCCAAGTCGTCCGGTGCCGCACCGACGAGGCGCTCGACCGCGCCAAGTCGCTCGTCAACCTGCAGTTCAACTGGTACCTGACCGGCACCGTGGCGTTCGTCGTCGTGTTCTACCTCCAGATGGCCAAGCAGTACCAGGAGCAGCCGCAGTACGCTCCGCTGGTGAAGGGAGGGAGAGGCAGCGATGGCCGGTGCACCATCGGAGAGGTCAATGACGACGAGGATGACCTTGAGGCCTCCAAAGGAGGCTTAGGATATATCGAAATTGAGAGG
+>species2_10
+ATGGGCACTCTCGTCGGGCACGTCGCGCCGGGCGCCGGCTTCCTCCTCATCGGCCTGTGGCAGCTATTCAGCCACATCCGCCTGTTCCTGCTGCGCCCGAGCTCGTACTCTGCTCCGGTCTGGTTCCCGGCGCCGGGCGTGCGCCACCTCGAGCTCATACTCATCATCATCGGCGCGGCGATGTCCATCCTGATGGAGCTCGTCATCGGCCCCGCGAAGCACCAGCCGTTCGACGACGACGGCACCATCCCGTCAGACCACCTCCACAACTTCGAGCACGCGTCCATCTCGCTGGCGCTGCTCGTCTTCGCCGCGGTCACCATCCACCTCGACAGGGTAAAGGCGCCCCTGCGTGACGCCGTGTCGCAGCTCGTCGCCGCCGCGGCGTTCGCGCAGCAGCTGCTCATCTTCCACCTCCACTCGGCGGACCACATGGGCGTGGAGGGGCAGTTCCACTGGCTGCTGCAGACGGTCATCGCCGTCACGCTCGCCACCACGCTGCTCGGGATCCCTTACCCGCGGAGCATCGTGGTGAGCCTTGTCCGGTCGGCCAGCCTCGTGCTCCAGGGCGTCTGGTTCGTCGTCATGGGCGTCATGCTGTGGACGCCGGCGCTCATACCCAAGGGCTGCTTCCTCAACCTCGAGGAAGGGCACGACGTCGTCCGGTGCCGCACCGACGAGGCGCTCGACCGCGCCAAGTCGCTCGTCAACCTGCAGTTCAGCTGGTACCTCACCGGCACGGTGGTGTTCGTCGTCCTGTTCTACCTCCAGATGGCGAAGCTCTACCCCGAGGAGCCGCAGTATTTGCCGCTGGTGAAGGGAGGAGGCGGCGGCGGCGATGACCGCGATAGCCGGTTCAGCATCGGAGACGATGATCACGACGATGAGGACGATGTCGAGGCTGCAAAACGTGGCTTCGGACACGTGGTTAGCGGCACAAAGCCTGTCGAAATCGAGAGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Mar 01 10:29:54 2017 -0500
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>