Mercurial > repos > greg > gene_family_classifier

--- a/gene_family_classifier.xml	Mon Jan 30 11:15:25 2017 -0500
+++ b/gene_family_classifier.xml	Mon Jan 30 14:48:51 2017 -0500
@@ -13,6 +13,7 @@
     </stdio>
     <command>
         <![CDATA[
+        	#set output_label = ""
             #if str($options_type.options_type_selector) == 'advanced':
                 #set create_orthogroup_cond = $options_type.create_orthogroup_cond
                 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
@@ -41,9 +42,11 @@

             GeneFamilyClassifier
             --proteins "$input"
+            --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds"
             --scaffold "$scaffold"
             --method $method
             --classifier $save_hmmscan_log_cond.classifier
+            --num_threads \${GALAXY_SLOTS:-4}

             #if str($options_type.options_type_selector) == 'advanced':
                 --super_orthogroups $options_type.super_orthogroups
@@ -54,14 +57,14 @@
                 #if str($create_orthogroup) == 'yes':
                     --orthogroup_fasta
                     #if $create_corresponding_coding_sequences:
+                    	#set output_label = "Gene family clusters with corresponding coding sequences"
                         --coding_sequences "$create_corresponding_coding_sequences_cond.coding_sequences"
+                    #else:
+                    	#set output_label = "Gene family clusters"
                     #end if
                 #end if
             #end if

-            --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds"
-            --num_threads \${GALAXY_SLOTS:-4}
-
             #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both':
                 #if str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
                     && mv geneFamilyClassification_dir/hmmscan.log $hmmscan_log
@@ -72,9 +75,9 @@

             #if $create_ortho_sequences:
                 #if $create_corresponding_coding_sequences:
-                    && echo "Sequences classified into precomputed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
+                    && echo "# Precomputed orthologous gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
                 #else:
-                    && echo "Sequences classified into precomputed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
+                    && echo "# Precomputed orthologous gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
                 #end if
                 && ls -al $orthogroups_fasta_src_dir | grep f >> $output
                 && mv $orthogroups_fasta_src_dir/* $dest_dir || true
@@ -161,23 +164,26 @@
         <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}">
             <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
         </data>
-        <data name="output" format="txt" label="Sequences classified into gene family clusters on ${on_string}">
+        <data name="output" format="txt" label="${output_label} on ${on_string}">
             <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter>
         </data>
-        <collection name="transcripts" type="list">
+        <collection name="orthos" type="list">
             <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
         </collection>
     </outputs>
     <tests>
         <test>
-            <param name="input" value="" ftype="fasta" />
-            <param name="prediction_method" value="transdecoder"/>
-            <param name="target_gene_family_assembly" value="no"/>
-            <param name="strand_specific" value="yes"/>
+        	<!-- Not sure how to test this since the tool requires scaffolds data which is extremely large and installed using a Data Manager -->
+            <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta" />
+            <param name="scaffold" value="22Gv1.1"/>
+            <param name="method" value="orthomcl"/>
+            <param name="classifier" value="blastp"/>
             <param name="dereplicate" value="yes"/>
             <param name="min_length" value="200"/>
             <output_collection name="orthos" type="list">
-
+                <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
+                <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
+                <element name="proteins.blastp.22Gv1.1.bestOrthos.summary" file="proteins.blastp.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/>
             </output_collection>
         </test>
     </tests>
@@ -187,7 +193,7 @@
 sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive
 to remote homologs) or both (more exhaustive).

-This tool accepts any of the following as input.
+This tool accepts any of the following as input:

 * the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool
 * externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly