annotate velveth.xml @ 2:e06e0d3aca0a default tip

Fix cheetah code for exp_cov
author Jim Johnson <jj@umn.edu>
date Thu, 29 Aug 2013 06:38:40 -0500
parents 67d18741a0e7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="velveth" name="velveth" version="1.0.1">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
2 <description>Prepare a dataset for the Velvet velvetg Assembler</description>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
4 <requirement type="package" version="1.2.10">velvet</requirement>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
6 <macros>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
7 <macro name="categories">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
8 <param name="category" type="select" label="">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
9 <option value="" selected="true"></option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
10 <option value="2">2</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
11 <option value="3">3</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
12 <option value="4">4</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
13 </param>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
14 </macro>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
15 <macro name="paired_reads">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
16 <conditional name="files">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
17 <param name="separate" type="boolean" checked="false" truevalue="separate" falsevalue="interleaved" label="Are the reads paired and in two different files?" help="Tick this box if this is a paired library with forward and reverse read files.."/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
18 <when value="interleaved">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
19 <param name="forwards_file" type="data" format="fasta,fastq,raw,sam,bam" label="Read dataset"/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
20 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
21 <when value="separate">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
22 <param name="forwards_file" type="data" format="fasta,fastq,raw,sam,bam" label="Read dataset for direction 1"/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
23 <param name="reverse_file" type="data" format="fasta,fastq,raw,sam,bam" label="Read dataset for direction 2"/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
24 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
25 </conditional>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
26 </macro>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
27 </macros>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
28 <command interpreter="python">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
29 velveth_wrapper.py '$out_file1' '$out_file1.files_path' $hash_length $strand_specific
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
30 #for i in $inputs:
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
31 #if $i.reads.readtype == 'short':
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
32 -short${i.reads.category}
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
33 -#if str($i.reads.forwards_file.ext).startswith('fast') then $i.reads.forwards_file.ext[:5] else $i.reads.forwards_file.ext#
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
34 $i.reads.forwards_file.file_name
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
35 #elif $i.reads.readtype == 'shortPaired':
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
36 -shortPaired${i.reads.category}
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
37 -#if str($i.reads.files.forwards_file.ext).startswith('fast') then $i.reads.files.forwards_file.ext[:5] else $i.reads.files.forwards_file.ext#
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
38 #if $i.reads.files.separate:
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
39 -separate
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
40 $i.reads.files.forwards_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
41 $i.reads.files.reverse_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
42 #else
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
43 $i.reads.files.forwards_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
44 #end if
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
45 #elif $i.reads.readtype == 'long':
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
46 -long
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
47 -#if str($i.reads.forwards_file.ext).startswith('fast') then $i.reads.forwards_file.ext[:5] else $i.reads.forwards_file.ext#
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
48 $i.reads.forwards_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
49 #elif $i.reads.readtype == 'longPaired':
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
50 -longPaired
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
51 -#if str($i.reads.files.forwards_file.ext).startswith('fast') then $i.reads.files.forwards_file.ext[:5] else $i.reads.files.forwards_file.ext#
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
52 #if $i.reads.files.separate:
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
53 -separate
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
54 $i.reads.files.forwards_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
55 $i.reads.files.reverse_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
56 #else
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
57 $i.reads.files.forwards_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
58 #end if
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
59 #elif $i.reads.readtype == 'reference':
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
60 -reference
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
61 -#if str($i.reads.forwards_file.ext).startswith('fast') then $i.reads.forwards_file.ext[:5] else $i.reads.forwards_file.ext#
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
62 $i.reads.forwards_file.__str__
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
63 #end if
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
64 #end for
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
65 </command>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
66 <inputs>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
67 <param name="hash_length" type="integer" value="19" label="Hash Length" help="k-mer length in base pairs of the words being hashed.">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
68 <validator type="in_range" min="7" max="191"/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
69 <validator type="expression" message="Must be an odd number">(int(value) % 2 == 1)</validator>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
70 </param>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
71
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
72 <param name="strand_specific" type="boolean" checked="false" truevalue="-strand_specific" falsevalue="" label="Use strand specific transcriptome sequencing" help="If you are using a strand specific transcriptome sequencing protocol, you may wish to use this option for better results."/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
73
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
74 ##Read files section. No need for zipped or auto format options as the data is already sucked in in a legible format...
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
75 <repeat name="inputs" min="1" title="Input read libraries">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
76 <conditional name="reads">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
77 <param name="readtype" type="select" label="Read type">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
78 <option value="short" selected="yes">short</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
79 <option value="shortPaired">shortPaired</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
80 <option value="long">long</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
81 <option value="longPaired">longPaired</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
82 <option value="reference">reference</option>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
83 </param>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
84 <when value="short">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
85 <expand macro="categories" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
86 <param name="forwards_file" type="data" format="fasta,fastq,raw,sam,bam" label="Read dataset"/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
87 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
88 <when value="shortPaired">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
89 <expand macro="categories" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
90 <expand macro="paired_reads" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
91 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
92 <when value="long">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
93 <param name="forwards_file" type="data" format="fasta,fastq,raw,sam,bam" label="Read dataset"/>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
94 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
95 <when value="longPaired">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
96 <expand macro="paired_reads" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
97 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
98 <when value="reference">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
99 </when>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
100 </conditional>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
101 </repeat>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
102 </inputs>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
103 <outputs>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
104 <data format="velvet" name="out_file1" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
105 </outputs>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
106 <tests>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
107 <test>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
108 <param name="hash_length" value="21" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
109 <param name="read_type" value="-shortPaired" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
110 <!-- <repeat name="inputs"> -->
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
111 <param name="file_format" value="fasta" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
112 <param name="read_type" value="shortPaired reads" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
113 <param name="input" value="velvet_test_reads.fa" ftype="fasta" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
114 <!-- </repeat> -->
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
115 <param name="strand_specific" value="" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
116 <output name="out_file1" file="velveth_test1/output.html" lines_diff="4">
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
117 <extra_files type="file" name='Sequences' value="velveth_test1/Sequences" compare="diff" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
118 <extra_files type="file" name='Roadmaps' value="velveth_test1/Roadmaps" compare="diff" />
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
119 </output>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
120 </test>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
121 </tests>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
122 <help>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
123 **Velvet Overview**
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
124
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
125 Velvet_ is a de novo genomic assembler specially designed for short read sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI), near Cambridge, in the United Kingdom.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
126
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
127 Velvet currently takes in short read sequences, removes errors then produces high quality unique contigs. It then uses paired-end read and long read information, when available, to retrieve the repeated areas between contigs.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
128
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
129 Read the Velvet `documentation`__ for details on using the Velvet Assembler.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
130
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
131 .. _Velvet: http://www.ebi.ac.uk/~zerbino/velvet/
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
132
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
133 .. __: http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
134
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
135 ------
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
136
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
137 **Velveth**
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
138
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
139 Velveth takes in a number of sequence files, produces a hashtable, then outputs two files in an output directory (creating it if necessary), Sequences and Roadmaps, which are necessary to velvetg.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
140
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
141 ------
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
142
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
143 **Hash Length**
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
144
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
145 The hash length, also known as k-mer length, corresponds to the length, in base pairs, of the words being hashed.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
146
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
147 The hash length is the length of the k-mers being entered in the hash table. Firstly, you must observe three technical constraints::
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
148
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
149 # it must be an odd number, to avoid palindromes. If you put in an even number, Velvet will just decrement it and proceed.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
150 # it must be below or equal to MAXKMERHASH length (cf. 2.3.3, by default 31bp), because it is stored on 64 bits
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
151 # it must be strictly inferior to read length, otherwise you simply will not observe any overlaps between reads, for obvious reasons.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
152
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
153 Now you still have quite a lot of possibilities. As is often the case, it's a trade- off between specificity and sensitivity. Longer kmers bring you more specificity (i.e. less spurious overlaps) but lowers coverage (cf. below). . . so there's a sweet spot to be found with time and experience.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
154 We like to think in terms of "k-mer coverage", i.e. how many times has a k-mer been seen among the reads. The relation between k-mer coverage Ck and standard (nucleotide-wise) coverage C is Ck = C # (L - k + 1)/L where k is your hash length, and L you read length.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
155 Experience shows that this kmer coverage should be above 10 to start getting decent results. If Ck is above 20, you might be "wasting" coverage. Experience also shows that empirical tests with different values for k are not that costly to run!
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
156
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
157 **Input Files**
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
158
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
159 Velvet works mainly with fasta and fastq formats. For paired-end reads, the assumption is that each read is next to its mate
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
160 read. In other words, if the reads are indexed from 0, then reads 0 and 1 are paired, 2 and 3, 4 and 5, etc.
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
161
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
162 Supported file formats are::
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
163
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
164 fasta
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
165 fastq
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
166 fasta.gz
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
167 fastq.gz
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
168 eland
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
169 gerald
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
170
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
171 Read categories are::
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
172
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
173 short (default)
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
174 shortPaired
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
175 short2 (same as short, but for a separate insert-size library)
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
176 shortPaired2 (see above)
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
177 long (for Sanger, 454 or even reference sequences)
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
178 longPaired
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
179
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
180 </help>
67d18741a0e7 Uploaded
jjohnson
parents:
diff changeset
181 </tool>