comparison split_libraries.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c1bd0c560018
1 <tool id="qiime_split_libraries" name="Split libraries" version="1.9.1">
2 <description>according to barcodes specified in mapping file</description>
3
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7
8 <expand macro="requirements" />
9
10 <version_command><![CDATA[
11 split_libraries.py --version
12 ]]></version_command>
13
14 <command><![CDATA[
15 split_libraries.py
16 -m $mapping_fp
17 -o split_libraries
18
19 #set $seq_files = ''
20 #set $sep = ''
21 #for $file in $input_files_fasta
22 #set $seq_files += $sep + str($file)
23 #set $sep = ','
24 #end for
25 -f $seq_files
26
27 #if str($input_files_qual) != 'None':
28 set $files = ''
29 #set $sep = ''
30 #for $file in $input_files_qual
31 #set $files += $sep + str($file)
32 #set $sep = ','
33 #end for
34 -q $files
35 #end if
36
37 -l $min_seq_length
38 -L $max_seq_length
39 $trim_seq_length
40
41 $keep_primer
42 $keep_barcode
43
44 -a $max_ambig
45 -H $max_homopolymer
46 -M $max_primer_mismatch
47
48 #if str( $barcode_type.barcode_selector ) != "custom_length"
49 -b $barcode_type.barcode_selector
50 #else
51 -b $barcode_type.barcode_length
52 #end if
53
54 -e $max_barcode_errors
55 -n $start_numbering_at
56 $retain_unassigned_reads
57 $disable_bc_correction
58
59 #if str($input_files_qual) != 'None':
60 -s $min_qual_score
61 -w $qual_score_window
62 $discard_bad_windows
63 $record_qual_scores
64 #end if
65
66 $disable_primers
67
68 $reverse_primers.reverse_primers_test
69 #if str($reverse_primers.reverse_primers_test) == '--reverse_primers':
70 --reverse_primer_mismatches $reverse_primers.reverse_primer_mismatches
71 #end if
72
73 #if str($median_length_filtering):
74 -i $median_length_filtering
75 #end if
76
77 #if str($added_demultiplex_field):
78 -j $added_demultiplex_field
79 #end if
80 ]]>
81 </command>
82
83 <inputs>
84 <param name="mapping_fp" label="Metadata mapping filepath" type="data"
85 format="tabular,txt,tsv,csv" help="The file must contain header
86 line indicating SampleID in the first column and BarcodeSequence in
87 the second, LinkerPrimerSequence in the third. It is recommended to
88 check the mapping file using the dedicated file (-m/--mapping_fp)"/>
89
90 <param name="input_files_fasta" type="data" format="fasta"
91 label="Input fasta files" multiple="True" help="(-f/--fasta)"/>
92
93 <param name="input_files_qual" type="data"
94 format="qual,qual454,qualillumina,qualsolexa,qualsolid"
95 label="Input quality files (optional)" multiple="True"
96 help="(-q/--qual)" optional="True"/>
97
98 <param name="min_seq_length" type="integer" value="200"
99 label="Minimum sequence length" help="(-l/--min_seq_length)"/>
100
101 <param name="max_seq_length" type="integer" value="1000"
102 label="Maximum sequence length" help="(-L/--max_seq_length)"/>
103
104 <param name="trim_seq_length" type="boolean" label="Compute sequence
105 lengths after trimming and barcodes?" truevalue="-t" falsevalue=""
106 selected="False" help="(-t/--trim_seq_length)" />
107
108 <param name="min_qual_score" type="integer" value="25"
109 label="Minimum average quality score allowed in read"
110 help="(-s/--min_qual_score)"/>
111
112 <param name="keep_primer" type="boolean" label="Remove primer from
113 sequences?" truevalue="" falsevalue="--keep_primer"
114 selected="True" help="(-k/--keep_primer)" />
115
116 <param name="keep_barcode" type="boolean" label="Remove barcode from
117 sequences?" truevalue="" falsevalue="--keep_barcode"
118 selected="True" help="(-B/--keep_barcode)" />
119
120 <param name="max_ambig" type="integer" value="6"
121 label="Maximum number of ambiguous bases" help="(-a/--max_ambig)"/>
122
123 <param name="max_homopolymer" type="integer" value="6"
124 label="Maximum length of homopolymer run" help="(-H/--max_homopolymer)"/>
125
126 <param name="max_primer_mismatch" type="integer" value="0"
127 label="Maximum number of primer mismatch" help="(-M/--max_primer_mismatch)"/>
128
129 <conditional name="barcode_type">
130 <param name="barcode_selector" type="select" label="Type of barcode"
131 help="(-b/ --barcode_type)">
132 <option value="hamming_8">hamming_8</option>
133 <option value="golay_12" selected="true">golay_12</option>
134 <option value="variable_length">variable_length (disable any barcode correction)</option>
135 <option value="custom_length">Custom length</option>
136 </param>
137 <when value="hamming_8" />
138 <when value="golay_12" />
139 <when value="variable_length" />
140 <when value="custom_length">
141 <param name="barcode_length" type="integer" value="4"
142 label="Barcode length"/>
143 </when>
144 </conditional>
145
146 <param name="max_barcode_errors" type="float" value="1.5"
147 label="Maximum number of errors in barcode"
148 help="(-e/--max_barcode_errors)"/>
149
150 <param name="start_numbering_at" type="integer" value="1"
151 label="Sequence id to use for the first seuqence"
152 help="(-n/--start_numbering_at)"/>
153
154 <param name="retain_unassigned_reads" type="boolean" label="Retain
155 sequences with are Unassigned in the output sequence file?"
156 truevalue="--retain_unassigned_reads" falsevalue=""
157 selected="False" help="(--retain_unassigned_reads)" />
158
159 <param name="retain_unassigned_reads" type="boolean" label="Retain
160 sequences with are Unassigned in the output sequence file?"
161 truevalue="--retain_unassigned_reads" falsevalue=""
162 selected="False" help="(--retain_unassigned_reads)" />
163
164 <param name="disable_bc_correction" type="boolean" label="Disable attempts
165 to find nearest corrected barcode?"
166 truevalue="(--disable_bc_correction)" falsevalue=""
167 selected="False" help="It can improve performance.
168 (-c/--disable_bc_correction)" />
169
170 <param name="qual_score_window" type="integer" value="0"
171 label="Size of the sliding window" help="If the average score of a
172 continuous set of w nucleotides falls below the threshold, the sequence
173 is discarded. A good value would be 50. 0 (zero) means no filtering.
174 Must pass a .qual file (see -q parameter) if this functionality is
175 enabled. Default behavior for this function is to truncate the sequence
176 at the beginning of the poor quality window, and test for minimal
177 length (-l parameter) of the resulting sequence (-w/--qual_score_window)"/>
178
179 <param name="discard_bad_windows" type="boolean" label="Discard any
180 sequences where a bad window is found?"
181 truevalue="--discard_bad_windows" falsevalue=""
182 selected="False" help="It will work if the sliding window length is bigger
183 than 0 (-g/--discard_bad_windows)" />
184
185 <param name="disable_primers" type="boolean" label="Disable primer usage
186 when demultiplexing?" truevalue="--disable_primers" falsevalue=""
187 selected="False" help="It should be enabled for unusual circumstances,
188 such as analyzing Sanger sequence data generated with different primers
189 (-p/--disable_primers)" />
190
191 <conditional name="reverse_primers">
192 <param name="reverse_primers_test" type="select" label="Enable removal
193 of the reverse primer and any subsequence sequence from the end
194 of each read?" help="(-z/--reverse_primers)" >
195 <option value="--reverse_primers">Yes</option>
196 <option value="" selected="true">No</option>
197 </param>
198 <when value="" />
199 <when value="--reverse_primers" >
200 <param name="reverse_primer_mismatches" type="integer" value="0"
201 label="Number of allowed mismatches for reverse primers"
202 help="(--reverse_primer_mismatches)"/>
203 </when>
204 </conditional>
205
206 <param name="record_qual_scores" type="boolean" label="Record quality
207 scores for all sequences that are recorded?" truevalue="--record_qual_scores"
208 falsevalue="" selected="False" help="If this option is enabled, a file
209 named seqs_filtered.qual will be created in the output directory, and
210 will contain the same sequence IDs in the seqs.fna file and sequence
211 quality scores matching the bases present in the seqs.fna file
212 (-d/--record_qual_scores)" />
213
214 <param name="median_length_filtering" type="integer"
215 label="Median length filtering (optional)" help="It disables minimum
216 and maximum sequence length filtering, and instead calculates the median
217 sequence length and filters the sequences based upon the number of median
218 absolute deviations specified by this parameter. Any sequences with
219 lengths outside the number of deviations will be removed
220 (-i/--median_length_filtering)"
221 optional="True"/>
222
223 <param name="added_demultiplex_field" type="text" label="Field
224 to use in the mapping file as additional demultiplexing (optional)"
225 help="It can be used with or without barcodes. All combinations of
226 barcodes/primers and these fields must be unique. The fields must contain
227 values that can be parsed from the fasta labels such as 'plate=R_2008_12_09'.
228 In this case, 'plate' would be the column header and 'R_2008_12_09'
229 would be the field data (minus quotes) in the mapping file.
230 To use the run prefix from the fasta label, such as 'FLP3FBN01ELBSX',
231 where 'FLP3FBN01' is generated from the run ID, use 'run_prefix' and
232 set the run prefix to be used as the data under the column header
233 'run_prefix' (-j/--added_demultiplex_field)" optional="True"/>
234
235 <param name="truncate_ambi_bases" type="boolean" label="Enable to truncate
236 at the first N character encountered in the sequences?"
237 truevalue="--truncate_ambi_bases" falsevalue="" selected="False"
238 help="This will disable testing for ambiguous bases
239 (-x/--truncate_ambi_bases)"/>
240 </inputs>
241
242 <outputs>
243 <data name="sequences" format="fasta"
244 from_work_dir="split_libraries/*.fna"
245 label="${tool.name} on ${on_string}: sequences"/>
246
247 <data name="log" format="txt"
248 from_work_dir="split_libraries/split_library_log.txt"
249 label="${tool.name} on ${on_string}: log"/>
250
251 <data name="histograms" format="txt"
252 from_work_dir="split_libraries/histograms.txt"
253 label="${tool.name} on ${on_string}: histograms"/>
254
255 <data name="quality" format="qual,qual454,qualillumina,qualsolexa,qualsolid"
256 from_work_dir="split_libraries/*.qual"
257 label="${tool.name} on ${on_string}: quality">
258 <filter>record_qual_scores is True</filter>
259 </data>
260 </outputs>
261
262 <tests>
263 <test>
264 </test>
265 </tests>
266
267 <help><![CDATA[
268
269 **What it does**
270
271 This tool splits libraries according to barcodes specified in mapping file.
272
273 Since newer sequencing technologies provide many reads per run (e.g. the 454 GS FLX Titanium series can produce 400-600 million base pairs with 400-500 base pair read lengths) researchers are now finding it useful to combine multiple samples into a single 454 run. This multiplexing is achieved through the application of a pyrosequencing-tailored nucleotide barcode design (described in (Parameswaran et al., 2007)). By assigning individual, unique sample specific barcodes, multiple sequencing runs may be performed in parallel and the resulting reads can later be binned according to sample. The script %prog performs this task, in addition to several quality filtering steps including user defined cut-offs for: sequence lengths; end-trimming; minimum quality score. To summarize, by using the fasta, mapping, and quality files, the program %prog will parse sequences that meet user defined quality thresholds and then rename each read with the appropriate Sample ID, thus formatting the sequence data for downstream analysis. If a combination of different sequencing technologies are used in any particular study, %prog can be used to perform the quality-filtering for each library individually and the output may then be combined.
274
275 Sequences from samples that are not found in the mapping file (no corresponding barcode) and sequences without the correct primer sequence will be excluded. Additional scripts can be used to exclude sequences that match a given reference sequence (e.g. the human genome; exclude_seqs_by_blast.py) and/or sequences that are flagged as chimeras (identify_chimeric_seqs.py).
276
277 More information about this tool is available on
278 `QIIME documentation <http://qiime.org/scripts/split_libraries.html>`_.
279 ]]>
280 </help>
281
282 <citations>
283 <expand macro="citations" />
284 </citations>
285 </tool>