diff abyss-pe.xml @ 6:1e19058d6c0a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/abyss commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
author iuc
date Sun, 21 Jun 2015 22:50:20 -0400
parents bb97dd7d11d5
children 51ca82f3ffaf
line wrap: on
line diff
--- a/abyss-pe.xml	Mon May 04 22:47:42 2015 -0400
+++ b/abyss-pe.xml	Sun Jun 21 22:50:20 2015 -0400
@@ -1,64 +1,244 @@
-<tool id="abyss-pe" name="Abyss Paired-End" version="1.0.0">
-    <description>Assemble short paired reads</description>
+<tool id="abyss-pe" name="ABySS" version="1.9.0.0">
+    <description>de novo sequence assembler</description>
     <macros>
-        <import>macros.xml</import>
+        <xml name="reads_conditional">
+            <conditional name="reads">
+                <param name="reads_selector" type="select" label="Type of paired-end datasets">
+                    <option value="paired">2 separate datasets</option>
+                    <option value="paired_collection">1 paired dataset collection</option>
+                    <option value="paired_il">1 dataset of interleaved reads</option>
+                </param>
+                <when value="paired">
+                    <param name="reads1" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads 1" />
+                    <param name="reads2" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads 2" />
+                </when>
+                <when value="paired_collection">
+                    <param name="reads_coll" type="data_collection" collection_type="paired" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads collection" />
+                </when>
+                <when value="paired_il">
+                    <param name="reads_il" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Interleaved paired-end reads" />
+                </when>
+            </conditional>
+        </xml>
     </macros>
-    <expand macro="requirements" />
-    <expand macro="stdio" />
-    <command interpreter='perl'>abyss-pe_wrapper.pl
-$k
-$n
-$outfile.extra_files_path
-$outfile
-$contigs_outfile
-$sam_outfile
-$coverage_histogram_outfile
-#for $i in $infiles
-${i.infile}
-#end for
+    <requirements>
+        <requirement type="package" version="1.9.0">abyss</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+    <version_command>ABYSS --version | head -n 1</version_command>
+    <command>
+abyss-pe name=abyss j=\${GALAXY_SLOTS:-1} k=$k
+#if str($K)
+  K=$K
+#end if
+#if str($q)
+  q=$q
+#end if
+#if str($Q)
+  Q=$Q
+#end if
+#if str($e)
+  e=$e
+#end if
+#if str($E)
+  E=$E
+#end if
+#if str($t)
+  t=$t
+#end if
+#if str($c)
+  c=$c
+#end if
+#if str($b)
+  b=$b
+#end if
+#if $SS
+  SS=--SS
+#end if
+#if str($m)
+  m=$m
+#end if
+#if str($p)
+  p=$p
+#end if
+#if str($a)
+  a=$a
+#end if
+#if str($l)
+  l=$l
+#end if
+#if str($s)
+  s=$s
+#end if
+#if str($n)
+  n=$n
+#end if
+#if str($d)
+  d=$d
+#end if
+#if str($S)
+  S=$S
+#end if
+#if str($N)
+  N=$N
+#end if
+#if $lib_repeat
+  lib='
+  #for $i in range(len($lib_repeat))
+    lib$i
+  #end for
+  '
+  #for $i, $v in enumerate($lib_repeat)
+    #if $v.reads.reads_selector == 'paired'
+      lib${i}='${v.reads.reads1} ${v.reads.reads2}'
+    #elif $v.reads.reads_selector == 'paired_collection'
+      lib${i}='${v.reads.reads_coll.forward} ${v.reads.reads_coll.reverse}'
+    #else
+      lib${i}='${v.reads.reads_il}'
+    #end if
+  #end for
+#end if
+#if str($se_reads) != 'None'
+  se='
+  #for $v in $se_reads
+    $v
+  #end for
+  '
+#end if
+#if $mp_repeat
+  mp='
+  #for $i in range(len($mp_repeat))
+    mp$i
+  #end for
+  '
+  #for $i, $v in enumerate($mp_repeat)
+    #if $v.reads.reads_selector == 'paired'
+      mp${i}='${v.reads.reads1} ${v.reads.reads2}'
+    #elif $v.reads.reads_selector == 'paired_collection'
+      mp${i}='${v.reads.reads_coll.forward} ${v.reads.reads_coll.reverse}'
+    #else
+      mp${i}='${v.reads.reads_il}'
+    #end if
+  #end for
+#end if
+#if str($long_seqs) != 'None'
+  long='
+  #for $v in $long_seqs
+    $v
+  #end for
+  '
+#end if
     </command>
 
     <inputs>
-        <repeat name="infiles" title="Paired Reads Files">
-            <param name="infile" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired read sequences"/>
+        <repeat name="lib_repeat" title="Paired-end library" help="(lib)">
+            <expand macro="reads_conditional" />
+        </repeat>
+        <param name="se_reads" type="data" multiple="true" optional="true" format="fasta,fastq,fastqsanger,fastqillumina" label="Single-end reads" help="(se)" />
+        <repeat name="mp_repeat" title="Mate-pair library" help="The mate-pair libraries are used only for scaffolding and do not contribute towards the consensus sequence (mp)">
+            <expand macro="reads_conditional" />
         </repeat>
-        <param name="k" type="integer" value="41" label="[-k] K-mer size" help="Try multiple sizes, starting around 2/3 read length" />
-        <param name="n" type="integer" value="10" label="[-n] Min. num. pairs for scaffolding" help="Requirement for joining contigs into a scaffold" />
+        <param name="long_seqs" type="data" multiple="true" optional="true" format="fasta,fastq,fastqsanger,fastqillumina" label="Long sequences" help="Long sequence libraries are used only for rescaffolding and do not contribute towards the consensus sequence (long)" />
+        <param name="k" type="integer" value="41" label="K-mer length (in bp)" help="The length of a k-mer (when -K is not set) or the span of a k-mer pair (when K is set) (k)" />
+        <param name="K" type="integer" value="" optional="true" label="The length (in bp) of a single k-mer in a k-mer pair" help="Optional (K)" />
+        <param name="q" type="integer" value="3" max="40" optional="true" label="Minimum base quality when trimming" help="Trim bases from the ends of reads whose quality is less than this value (q)" />
+        <param name="Q" type="integer" value="0" max="40" optional="true" label="Minimum base quality" help="Mask all bases of reads whose quality is less than this value as 'N' (Q)" />
+        <param name="e" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage" help="Erode bases at the ends of blunt contigs with coverage less than this value. Defaults to round(sqrt(median)) (e)" />
+        <param name="E" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage per strand" help="Erode bases at the ends of blunt contigs with coverage less than this value on either strand. Defaults to 1 if sqrt(median) &gt; 2 else 0 (E)" />
+        <param name="t" type="integer" value="" optional="true" label="Maximum length of blunt contigs to trim" help="Defaults to the value of k (t)" />
+        <param name="c" type="float" value="" optional="true" label="Minimum mean k-mer coverage of a unitig" help="Defaults to sqrt(median) (c)" />
+        <param name="b" type="integer" value="" optional="true" label="Maximum length of a bubble (in bp)" help="abyss-pe has two bubble popping stages. The default limits are 3*k bp for ABYSS and 10000 bp for PopBubbles (b)" />
+        <param name="SS" type="boolean" label="Assemble in strand-specific mode" help="Requires that all libraries are strand-specific RNA-Seq libraries. Assumes that the first read in a read pair is reveresed with respect to the transcripts sequenced (SS)" />
+        <param name="m" type="integer" value="50" min="0" optional="true" label="Minimum overlap of two unitigs (in bp)" help="(m)" />
+        <param name="p" type="float" value="0.9" optional="true" label="Minimum sequence identity of a bubble" help="(p)" />
+        <param name="a" type="integer" value="2" min="0" optional="true" label="Maximum number of branches of a bubble" help="(a)" />
+        <param name="l" type="integer" value="" min="1"  optional="true" label="Minimum alignment length of a read (in bp)" help="Defaults to the value of k (l)" />
+        <param name="s" type="integer" value="200" min="0" optional="true" label="Minimum unitig length required for building contigs (in bp)" help="The seed length should be at least twice the value of k. If more sequence is assembled than the expected genome size, try increasing this value (s)" />
+        <param name="n" type="integer" value="10" min="0" optional="true" label="Minimum number of pairs required for building contigs" help="(n)" />
+        <param name="d" type="integer" value="6" min="0" optional="true" label="Allowable error of a distance estimate (in bp)" help="(d)" />
+        <param name="S" type="integer" value="" min="0" optional="true" label="Minimum contig length required for building scaffolds (in bp)" help="Defaults to the value of s (S)" />
+        <param name="N" type="integer" value="" min="0" optional="true" label="Minimum number of pairs required for building scaffolds" help="Defaults to the value of n (N)" />
     </inputs>
 
     <outputs>
-        <data name="outfile" format="txt" />
-        <data name="contigs_outfile" format="fasta" label="Contigs" />
-        <data name="sam_outfile" format="sam" label="Read aligments (Sam)" />
-        <data name="coverage_histogram_outfile" format="txt" label="Coverage histogram" />
+        <data name="unitigs" format="fasta" label="${tool.name} on ${on_string}: unitigs" from_work_dir="abyss-unitigs.fa" />
+        <data name="contigs_outfile" format="fasta" label="${tool.name} on ${on_string}: contigs" from_work_dir="abyss-contigs.fa">
+            <filter>lib_repeat</filter>
+        </data>
+        <data name="scaffolds" format="fasta" label="${tool.name} on ${on_string}: scaffolds" from_work_dir="abyss-scaffolds.fa">
+            <filter>lib_repeat or mp_repeat</filter>
+        </data>
+        <data name="bubbles" format="fasta" label="${tool.name} on ${on_string}: bubbles" from_work_dir="abyss-bubbles.fa" />
+        <data name="indels" format="fasta" label="${tool.name} on ${on_string}: indels" from_work_dir="abyss-indel.fa" />
+        <data name="coverage_histogram_outfile" format="tabular" label="${tool.name} on ${on_string}: coverage histogram" from_work_dir="coverage.hist" />
+        <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats" from_work_dir="abyss-stats.tab" />
     </outputs>
 
+    <tests>
+        <test>
+            <repeat name="lib_repeat">
+                <conditional name="reads">
+                    <param name="reads_selector" value="paired" />
+                    <param name="reads1" ftype="fastqsanger" value="assembly_sample-R1.fastq" />
+                    <param name="reads2" ftype="fastqsanger" value="assembly_sample-R2.fastq" />
+                </conditional>
+            </repeat>
+            <param name="k" value="50" />
+            <output name="unitigs" file="abyss-unitigs1.fa" />
+            <output name="contigs_outfile" file="abyss-contigs1.fa" />
+            <output name="scaffolds" file="abyss-scaffolds1.fa" />
+            <output name="bubbles" file="empty_file.fasta" />
+            <output name="indels" file="empty_file.fasta" />
+            <output name="coverage_histogram_outfile" file="coverage1.hist" />
+            <output name="stats" file="abyss-stats1.tab" />
+        </test>
+        <test>
+            <param name="se_reads" ftype="fastqsanger" value="assembly_sample-R1.fastq,assembly_sample-R2.fastq" />
+            <param name="k" value="50" />
+            <output name="unitigs" file="abyss-unitigs2.fa" />
+            <output name="bubbles" file="empty_file.fasta" />
+            <output name="indels" file="empty_file.fasta" />
+            <output name="coverage_histogram_outfile" file="coverage2.hist" />
+            <output name="stats" file="abyss-stats2.tab" />
+        </test>
+    </tests>
     <help>
 **What it does**
 
-ABySS is a de novo, paired-end sequence assembler that is designed for short reads.
+`ABySS`_ is a de novo sequence assembler intended for short paired-end reads and large genomes.
 
 **Input**
 
-The suffix of the read identifier for a pair of reads must be one of '1' and '2', or 'A' and 'B', or 'F' and 'R', or 'F3' and 'R3', or 'forward' and 'reverse'. The reads may be interleaved in the same file or found in different files; however, interleaved mates will use less memory.
+Input files should be FASTA or FASTQ files.
+
+For paired reads, a pair of reads must be named with the suffixes /1 and /2 to identify the first and second read, or the reads may be named identically. The paired reads may be in separate files or interleaved in a single file.
+
+At least a paired-end library or a single-end library must be provided.
+
+Contigs are generated only if at least a paired-end library is provided. Scaffolds are generated only if at least a paired-end library or a mate-pair library is provided.
 
 **Description**
 
-This tool performs the following commands:
+This tool performs the complete ABySS pipeline using abyss-pe, described in https://github.com/bcgsc/abyss/blob/master/doc/flowchart.pdf .
+
+**License and citation**
+
+This Galaxy tool is Copyright © 2015 `The Genome Analysis Centre (TGAC)`_ and is released under the `MIT license`_.
+
+.. _The Genome Analysis Centre (TGAC): http://www.tgac.ac.uk/
+.. _MIT license: http://opensource.org/licenses/MIT
 
-ABYSS - the single-end assembler
-AdjList - finds overlaps of length k-1 between contigs
-KAligner** - aligns reads to contigs
-ParseAligns** - finds pairs of reads in alignments
-DistanceEst** - estimates distances between contigs
-Overlap - find overlaps between blunt contigs
-SimpleGraph - finds paths between pairs of contigs
-MergePaths - merges consistent paths
-Consensus - for a colour-space assembly, convert the colour-space contigs to nucleotide contigs
+You can use this tool only if you agree to the license terms of: `ABySS`_.
 
-**Reference**
-
-http://www.bcgsc.ca/platform/bioinfo/software/abyss
+.. _ABySS: http://www.bcgsc.ca/platform/bioinfo/software/abyss
     </help>
-    <expand macro="citations" />
+    <citations>
+        <citation type="doi">10.1101/gr.089532.108</citation>
+    </citations>
 </tool>