diff fasta_compute_length.xml @ 2:d75972d4bd2a draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit 6e148b31fed1b322ce720804d8525088ec6d43f9
author devteam
date Thu, 29 Oct 2015 22:14:42 -0400
parents 2811169ce62b
children 19caae8fd9d4
line wrap: on
line diff
--- a/fasta_compute_length.xml	Tue Oct 13 12:19:36 2015 -0400
+++ b/fasta_compute_length.xml	Thu Oct 29 22:14:42 2015 -0400
@@ -1,51 +1,78 @@
-<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.0">
-	<description></description>
-	<command interpreter="python">fasta_compute_length.py $input $output $keep_first</command>
-	<inputs>
-		<param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
-		<param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
-	</inputs>
-	<outputs>
-		<data name="output" format="tabular"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_tool_compute_length_1.out" />
-		</test>
-		
-		<test>
-			<param name="input" value="extract_genomic_dna_out1.fasta" />
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_tool_compute_length_2.out" />
-		</test>
-		
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="keep_first" value="14"/>
-			<output name="output" file="fasta_tool_compute_length_3.out" />
-		</test>
-	</tests>
-	<help>
+<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.1">
+    <description></description>
+    <command interpreter="python">fasta_compute_length.py $input $output $keep_first $keep_first_word</command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
+        <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
+        <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
+            selected="false" label="Strip fasta description from header?"
+            help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="keep_first" value="0"/>
+            <param name="keep_first_word" value="id_and_desc" />
+            <output name="output" file="fasta_tool_compute_length_1.out" />
+        </test>
+
+        <test>
+            <param name="input" value="extract_genomic_dna_out1.fasta" />
+            <param name="keep_first" value="0"/>
+            <param name="keep_first_word" value="id_and_desc" />
+            <output name="output" file="fasta_tool_compute_length_2.out" />
+        </test>
+
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="keep_first" value="14"/>
+            <param name="keep_first_word" value="id_and_desc" />
+            <output name="output" file="fasta_tool_compute_length_3.out" />
+        </test>
+    </tests>
+    <help>
 
 **What it does**
 
-This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry. 
+This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry.
 
------	
+-----
 
 **Example**
 
 Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
 
-    &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
    TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
    TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
    &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
    AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa
+    &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
+    TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
+    TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
+    &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
+    AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa
 
 Running this tool while setting **How many characters to keep?** to **14** will produce this::
-	
-	EYKX4VC02EQLO5  108
-	EYKX4VC02D4GS2	 60
+
+    EYKX4VC02EQLO5  108
+    EYKX4VC02D4GS2   60
+
+However, if your IDs are not all the same length, you may wish to just keep the fasta ID, and not the description::
+
+    &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
+    TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
+    TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
+    &gt;EYKX4VC length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
+    AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa
+
+Running this tool with **Strip fasta description from header** set to **True** and **How many characters to keep?** set to **0** will produce::
+
+    EYKX4VC02EQLO5  108
+    EYKX4VC     60
 
 
-	</help>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq281</citation>
+    </citations>
 </tool>