diff cd_hit_est.xml @ 11:75fde37f69e5

Add cd-hit to protein fastas
author Jim Johnson <jj@umn.edu>
date Thu, 27 Jun 2013 21:27:06 -0500
parents f0c20796d33a
children
line wrap: on
line diff
--- a/cd_hit_est.xml	Thu Sep 13 20:15:09 2012 -0500
+++ b/cd_hit_est.xml	Thu Jun 27 21:27:06 2013 -0500
@@ -1,10 +1,15 @@
-<tool id="cd_hit_est" name="CD-HIT-EST" version="1.1">
+<tool id="cd_hit_est" name="CD-HIT-EST" version="1.2">
  <description>Cluster a nucleotide dataset into representative sequences</description>
  <requirements>
   <requirement type="package" version="4.6.1">cd-hit</requirement>
  </requirements>
+  <macros>
+    <import>cdhit_macros.xml</import>
+  </macros>
  <command>
-  cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand
+  cd-hit-est -i "$fasta_in" -o rep_seq -c $similarity -n $wordsize $strand
+  #include source=$common_cdhit_options#
+  #include source=$runtime_tuning#
  </command>
  <inputs>
   <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/>
@@ -22,6 +27,8 @@
     <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/>
   </param>
   <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/>
+  <expand macro="common_cdhit_options" />
+  <expand macro="runtime_tuning" />
  </inputs>
  <outputs>
   <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/>
@@ -29,12 +36,40 @@
  </outputs>
  <tests>
   <test>
+    <!-- Expect 3 clusters: 0,1,2 -->
     <param name="fasta_in" value="cd_hit_est_in.fa" />
     <param name="similarity" value="0.9"/>
     <param name="wordsize" value="8"/>
+    <param name="strand" value="true"/>
+    <!-- conditionals in macros -->
+    <param name="settings" value="no"/>
+    <param name="tuning" value="default"/>
     <output name="clusters_out">
         <assert_contents>
-            <has_text text=">Cluster" />
+            <has_text text=">Cluster 0" />
+            <!-- There should not be a Cluster 3 -->
+            <not_has_text text="Cluster 3" />
+            <has_text_matching expression="F12Fcsw_481739" />
+        </assert_contents>
+    </output>
+    <output name="fasta_out">
+        <assert_contents>
+            <has_text_matching expression="^>[MF]\d\dFcsw_\d*" />
+        </assert_contents>
+    </output>
+  </test>
+  <test>
+    <!-- tighter constraints should yield more clusters -->
+    <param name="fasta_in" value="cd_hit_est_in.fa" />
+    <param name="similarity" value="0.95"/>
+    <param name="wordsize" value="9"/>
+    <param name="strand" value="true"/>
+    <!-- conditionals in macros -->
+    <param name="settings" value="no"/>
+    <param name="tuning" value="default"/>
+    <output name="clusters_out">
+        <assert_contents>
+            <has_text text=">Cluster 4" />
             <has_text_matching expression=">F12Fcsw_481739" />
         </assert_contents>
     </output>