changeset 0:315440c6fb44 draft

Uploaded
author greg
date Fri, 09 Dec 2016 08:45:36 -0500
parents
children 8b5a8a3ca9ee
files gene_family_classifier.xml plant_tribes_scaffolds.loc.sample test-data/assembly.fasta test-data/transcripts.cleaned.nr.pep tool_data_table_conf.xml.sample
diffstat 5 files changed, 222 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_classifier.xml	Fri Dec 09 08:45:36 2016 -0500
@@ -0,0 +1,132 @@
+<tool id="plant_tribes_gene_family_classifier" name="PlantTribes GeneFamilyClassifier" version="1.0.0">
+    <description>pipeline</description>
+    <requirements>
+        <requirement type="package" version="0.2">plant_tribes_gene_family_classifier</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <!-- In case the return code has not been set properly check stderr too -->
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+    <command>
+        <![CDATA[
+            GeneFamilyClassifier
+            --proteins "$input"
+            --scaffold "$scaffold"
+            --method $method
+            --classifier $classifier
+            #if str($options_type.options_type_selector) == 'advanced':
+                --super_orthogroups $options_type.super_orthogroups
+                #if str($options_type.single_copy_cond) == 'taxa':
+                    --single_copy_taxa $options_type.single_copy_cond.single_copy_taxa
+                    --taxa_present $options_type.single_copy_cond.taxa_present
+                #end if
+                #if str($options_type.create_orthogroup_cond) == 'yes':
+                    --orthogroup_fasta
+                    --coding_sequences "$options_type.create_orthogroup_cond.coding_sequences"
+                #end if
+            #end if
+            --num_threads \${GALAXY_SLOTS:-4}
+        ]]>
+    </command>
+    <inputs>
+        <param name="input" format="fasta" type="data" label="Amino acids (proteins) sequences fasta file"/>
+        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/>
+        </param>
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+        <param name="classifier" type="select" label="Protein classification method">
+            <option value="blastp" selected="true">blastp</option>
+            <option value="hmmscan">HMMScan</option>
+            <option value="both">Both blastp and HMMScan</option>
+        </param>
+        <conditional name="options_type">
+            <param name="options_type_selector" type="select" label="Options Configuration">
+                <option value="basic" selected="true">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <param name="super_orthogroups" type="select" label="SuperOrthogroups MCL clustering" help="blastp e-value matrix between all pairs of orthogroups">
+                    <option value="min_evalue" selected="true">Minimum e-value</option>
+                    <option value="avg_evalue">Average e-value</option>
+                </param>
+                <conditional name="single_copy_cond">
+                    <param name="single_copy" type="select" label="Select single copy configuration">
+                        <option value="custom" selected="true">Single copy orthogroup custom</option>
+                        <option value="taxa">Minumum single copy taxa required in orthogroup</option>
+                    </param>
+                    <when value="custom" />
+                    <when value="taxa">
+                        <param name="single_copy_taxa" type="integer" value="20" label="Minumum single copy taxa required in orthogroup"/>
+                        <param name="taxa_present" type="integer" value="21" label="Minumum taxa required in single copy orthogroup"/>
+                    </when>
+                </conditional>
+                <conditional name="create_orthogroup_cond">
+                    <param name="create_orthogroup" type="select" label="Create orthogroup fasta files?">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="coding_sequences" format="fasta" type="data" label="Corresponding coding sequences (CDS) fasta file"/>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="transcripts" type="list">
+            <discover_datasets pattern="__name__" directory="assemblyPostProcessing_dir" visible="false" ext="fasta" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="" ftype="fasta" />
+            <param name="prediction_method" value="transdecoder"/>
+            <param name="target_gene_family_assembly" value="no"/>
+            <param name="strand_specific" value="yes"/>
+            <param name="dereplicate" value="yes"/>
+            <param name="min_length" value="200"/>
+            <output_collection name="orthos" type="list">
+
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
+complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs Gene family
+classification of the post processed de novo transcripts using either blastp (faster), HMMScan (slower but more sensitive to remote
+homologs) or both (more exhaustive).
+
+-----
+
+**Options**
+
+ * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
+ * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL.
+ * **Protein classification method** - One of blastp, HMMScan or both.
+ * **SuperOrthogroups MCL clustering** - blastp e-value matrix between all pairs of orthogroups.
+ * **Minumum single copy taxa required in orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only.
+ * **Minumum taxa required in single copy orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only.
+ * **Corresponding coding sequences (CDS) fasta file** - Used only when selecting "Create orthogroup fasta files?".
+ 
+    </help>
+    <citations>
+            <citation type="bibtex">
+                @unpublished{None,
+                author = {None},
+                title = {None},
+                year = {None},
+                eprint = {None},
+                url = {None}
+            }</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample	Fri Dec 09 08:45:36 2016 -0500
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assembly.fasta	Fri Dec 09 08:45:36 2016 -0500
@@ -0,0 +1,73 @@
+>contig_1
+CGATTAACCATGTGTACAACATGACCAATTATGGCCGATCTTGCTTGTTCAGGAGTCTCA
+CTCATCATTCCAAAATCAAGAAAAGCAAGCTTCCCGTCAGGTGTAGCTAAGAGATTCCCT
+GGGTGAGGATCTGCATGAAAATAGCCGTACTCAAGCAGCTGTCGAAGACTGCACTGTATG
+CCGGTATTCACCAGATCCAGAACACTGAGCCCTTGACTCTCAACGGCATCTTGCTCATTT
+AATTTAACACCTTCAACCCACTCCATTGTTAGCACCTTTCCACTCGTGTAATCCCAGAAA
+ATATCTGGGACAAGGATATCTTCCTTGTCTCCATATAATTTTTTAA
+>contig_2
+CCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCC
+GGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCAT
+ACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCC
+CGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATG
+GGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCC
+GATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTAC
+AACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTG
+TACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACA
+ACTGTTAAATTATGTGCGCTTTGATGATTAAAAACACGGGGTAATTGAACTCAAGGGATG
+GTTGAGAATTGTAATATTTCTGTAAGTGTCGGGGGATGGTTGAAATGCTTTTTATTATGA
+AGTGTTAAAATGTAAGATAAA
+>contig_3
+CGGGGGCAAGGTTTACATGAAAGTACATCTACAGGACCTAGATAAGCATCATCACCTGAC
+AAAAAGCCATACATAGTAACACTTGCTAGTTGATCTGTGAAAATGGTGCACGAGTCGTAA
+CAGACGGCATTAGTTCCTTTCCTGTAACAGCAGATTCGTATGAATGGTTTTCTCGAAATT
+CTTCTTCAAGTGCATCTATAGGCATGGCACGAAGTGACTCTATTGTGCCTTTGCTGGGAA
+TATCCGGCTCGCTCCTAACTGGCGTGGACCCTGTAGGCTCGTAATCCATGTATTTTTGCC
+TGAAAGTATCATTCGTGTGCTGTTCGATACAGGATACCTGCTGGGAATGATCTCTCTTAA
+GGTTCTCAATTGTTTCTGAATGAGCTCTAGCAGTTGTCAGAATTTCAGAAACAGATGCCT
+TCTCCTGCTCTGACAAGCCAGCAACAACAACTCCTTCATCCACA
+>contig_4
+CTGACGATGTTCATATTCATGCCACTCTAAATGTATGCCATCCATGTTGAGGAGAAATGC
+TTGTGTAAAGAAGAAACTGGAGTCAAGCGGTCCCGGTTTTACAGTTGAACGTTGCTCTAA
+ATTAGTCAAAGTACCCGACTACCCGCTCATCAAACTGGCGAGCTCGTTTTCCTCGCCGCC
+CTACTCGGTGTCTTGGCGGGGCACATGGGGGTGGCGTGTGGCGTGCGGGGATGCGACAAC
+ATCATAAATTCATAATCGAAGG
+>contig_5
+GTGAGAGTGTCGCCTTTTCCATGCTACCCCTTTCCAGATCTGACTTGTTCGAACCTTCCA
+TCATCCGGTTCGACGTCAGCAGTCGGTCCTTCTTCTCCAAGCACGAGAGCAGACGGTGCG
+AGAGCGAGCAAAGACTCGCCTGCCGCGCCTCTCGTGACCTGGCTCGCATCTCCAGGATTC
+ATGGCTCCTTAGCTCGCCCTTCTTCGATACTTCGCTAGACGCCCGAGGACGTCATCGAAG
+GCGAGGGTCGGCACAACGGAGTGCTCCTTGCATGATGTCGGGGCCTGGCATCACGCGGCA
+TCGGCCAAATCGTCCCTCTCGACGTGCGTCACGGAGAGGGCCGAGCGCCGGGACGACCGC
+GTGGAAGAGCCCGCGGGATGCGGGATCCCGCTTGACGTGTGTTTGTGGCGGACCACCG
+>contig_6
+TATAACAATTCAAAATATTCCTATGAAAACCAAGACCAGAGGCAACACCGGTATAACCAA
+AGCCAGAGGCAATATCCCTTTATTCTTCGGGAACAGAAAAATACACGATATGCTACAGAC
+AAAAAAATAAATCCAAACCATTCTTCTGTGCATAAACATTGTAAAAGTTTATTAGACCGC
+TGTAAAACTCGTAAATGAAATCCCTGGCAATTGAATCTGTTAAACCTGCTCC
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT
+AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG
+TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC
+AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGCG
+AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACG
+>contig_8
+TGGCATGAAGTCGGCTCGACTCGTGCCCGCTCGTCTATAGACGAATTGAGCTAGATTCAC
+TCATTCCATGAACTCGACTCGTTCATGAGTCGGCTCGTAGTTGTCCGGACTCGCTCCATG
+GCCAGCTCTACAACAGACTGCCTGTATGTAGATGATTGTATTGATTTGTTCTTCTCTTGT
+TTAAATCCAACCAAATATCAACATGATATTTGCAATTTC
+>contig_9
+ACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTT
+TGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCC
+TGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAG
+AATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAAC
+AGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAA
+ACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGC
+ACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGA
+>contig_10
+GGGGCATTGCCGCCGATTAATTCGAGCGCGAGGTTTCGCGTATCAGGCGGAATTGGGATT
+TGGCGCCACAAAGAGCGCCACCTATGATTTCTGTGGCGGACGACTTCACGAACTCGGACG
+GGACTATCTAACCAATCTTCCAAGTTCATTGCGACATCAGTATAAGGGCGTGATGAAGGT
+TCGCTATCGTCGCTTGAGTTATTGATGGGGCCCAAATTGAGATCGAGGTTCATTGTAGTG
+GTGTTCTCTTCTGCCATTTGAGAATTTCACAAGTTCTAACAAACGAAAACGCAAATCTTC
+GGGACTAATATGCAGAATTTCCCTAAATAGAAGGGGTTTAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.pep	Fri Dec 09 08:45:36 2016 -0500
@@ -0,0 +1,7 @@
+>contig_2
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
+VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
+LLNYVRFDD
+>contig_9
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
+AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Dec 09 08:45:36 2016 -0500
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>