Mercurial > repos > jjohnson > cdhit
diff cd_hit_est.xml @ 11:75fde37f69e5
Add cd-hit to protein fastas
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 27 Jun 2013 21:27:06 -0500 |
parents | f0c20796d33a |
children |
line wrap: on
line diff
--- a/cd_hit_est.xml Thu Sep 13 20:15:09 2012 -0500 +++ b/cd_hit_est.xml Thu Jun 27 21:27:06 2013 -0500 @@ -1,10 +1,15 @@ -<tool id="cd_hit_est" name="CD-HIT-EST" version="1.1"> +<tool id="cd_hit_est" name="CD-HIT-EST" version="1.2"> <description>Cluster a nucleotide dataset into representative sequences</description> <requirements> <requirement type="package" version="4.6.1">cd-hit</requirement> </requirements> + <macros> + <import>cdhit_macros.xml</import> + </macros> <command> - cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand + cd-hit-est -i "$fasta_in" -o rep_seq -c $similarity -n $wordsize $strand + #include source=$common_cdhit_options# + #include source=$runtime_tuning# </command> <inputs> <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/> @@ -22,6 +27,8 @@ <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/> </param> <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/> + <expand macro="common_cdhit_options" /> + <expand macro="runtime_tuning" /> </inputs> <outputs> <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/> @@ -29,12 +36,40 @@ </outputs> <tests> <test> + <!-- Expect 3 clusters: 0,1,2 --> <param name="fasta_in" value="cd_hit_est_in.fa" /> <param name="similarity" value="0.9"/> <param name="wordsize" value="8"/> + <param name="strand" value="true"/> + <!-- conditionals in macros --> + <param name="settings" value="no"/> + <param name="tuning" value="default"/> <output name="clusters_out"> <assert_contents> - <has_text text=">Cluster" /> + <has_text text=">Cluster 0" /> + <!-- There should not be a Cluster 3 --> + <not_has_text text="Cluster 3" /> + <has_text_matching expression="F12Fcsw_481739" /> + </assert_contents> + </output> + <output name="fasta_out"> + <assert_contents> + <has_text_matching expression="^>[MF]\d\dFcsw_\d*" /> + </assert_contents> + </output> + </test> + <test> + <!-- tighter constraints should yield more clusters --> + <param name="fasta_in" value="cd_hit_est_in.fa" /> + <param name="similarity" value="0.95"/> + <param name="wordsize" value="9"/> + <param name="strand" value="true"/> + <!-- conditionals in macros --> + <param name="settings" value="no"/> + <param name="tuning" value="default"/> + <output name="clusters_out"> + <assert_contents> + <has_text text=">Cluster 4" /> <has_text_matching expression=">F12Fcsw_481739" /> </assert_contents> </output>