comparison stacks_refmap.xml @ 0:457e01b31aa8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit e1c1550e0bd61c88ffead2b1c4f6ab7393052393
author iuc
date Sat, 25 Jun 2016 17:26:40 -0400
parents
children 6fb6281a836f
comparison
equal deleted inserted replaced
-1:000000000000 0:457e01b31aa8
1 <tool id="stacks_refmap" name="Stacks: reference map" version="@WRAPPER_VERSION@.1">
2 <description>the Stacks pipeline with a reference genome (ref_map.pl)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9 #from os.path import splitext
10 #import re
11
12 #if str( $options_usage.rad_analysis_type ) == "genetic":
13 #for $input_parent in $options_usage.parent_alignments:
14 #if $input_parent.ext == "sam":
15 #set $data_path = splitext($input_parent.element_identifier)[0]
16 #set $data_path = re.sub(r'\.1$', '', $data_path)
17 #set $data_path = $data_path + ".sam"
18 #else:
19 #set $data_path = splitext($input_parent.element_identifier)[0]
20 #set $data_path = re.sub(r'\.1$', '', $data_path)
21 #set $data_path = $data_path + ".bam"
22 #end if
23
24 ln -s "${input_parent}" "${data_path}" &&
25 #end for
26
27 #for $input_progeny in $options_usage.progeny_alignments:
28
29 #if $input_progeny:
30 #if $input_progeny.ext == "sam":
31 #set $data_path = splitext($input_progeny.element_identifier)[0]
32 #set $data_path = re.sub(r'\.1$', '', $data_path)
33 #set $data_path = $data_path + ".sam"
34 #else:
35 #set $data_path = splitext($input_progeny.element_identifier)[0]
36 #set $data_path = re.sub(r'\.1$', '', $data_path)
37 #set $data_path = $data_path + ".bam"
38 #end if
39
40 ln -s "${input_progeny}" "${data_path}" &&
41 #end if
42 #end for
43 #else:
44 #for $input_indiv in $options_usage.individual_sample:
45
46 #if $input_indiv.ext == "sam":
47 #set $data_path = splitext($input_indiv.element_identifier)[0]
48 #set $data_path = re.sub(r'\.1$', '', $data_path)
49 #set $data_path = $data_path + ".sam"
50 #else:
51 #set $data_path = splitext($input_indiv.element_identifier)[0]
52 #set $data_path = re.sub(r'\.1$', '', $data_path)
53 #set $data_path = $data_path + ".bam"
54 #end if
55
56 ln -s "${input_indiv}" "${data_path}" &&
57 #end for
58 #end if
59
60 mkdir stacks_outputs
61
62 &&
63
64 ref_map.pl
65
66 -T \${GALAXY_SLOTS:-1}
67
68 #if str( $options_usage.rad_analysis_type ) == "genetic":
69 #for $input_parent in $options_usage.parent_alignments:
70 #if $input_parent.ext == "sam":
71 #set $data_path = splitext($input_parent.element_identifier)[0]
72 #set $data_path = re.sub(r'\.1$', '', $data_path)
73 #set $data_path = $data_path + ".sam"
74 #else:
75 #set $data_path = splitext($input_parent.element_identifier)[0]
76 #set $data_path = re.sub(r'\.1$', '', $data_path)
77 #set $data_path = $data_path + ".bam"
78 #end if
79
80 -p "${data_path}"
81 #end for
82
83 -A $options_usage.cross_type
84
85 #for $input_progeny in $options_usage.progeny_alignments:
86 #if $input_progeny:
87 #if $input_progeny.ext == "sam":
88 #set $data_path = splitext($input_progeny.element_identifier)[0]
89 #set $data_path = re.sub(r'\.1$', '', $data_path)
90 #set $data_path = $data_path + ".sam"
91 #else:
92 #set $data_path = splitext($input_progeny.element_identifier)[0]
93 #set $data_path = re.sub(r'\.1$', '', $data_path)
94 #set $data_path = $data_path + ".bam"
95 #end if
96
97 -r "${data_path}"
98 #end if
99 #end for
100 #else:
101 #for $input_indiv in $options_usage.individual_sample:
102
103 #if $input_indiv.ext == "sam":
104 #set $data_path = splitext($input_indiv.element_identifier)[0]
105 #set $data_path = re.sub(r'\.1$', '', $data_path)
106 #set $data_path = $data_path + ".sam"
107 #else:
108 #set $data_path = splitext($input_indiv.element_identifier)[0]
109 #set $data_path = re.sub(r'\.1$', '', $data_path)
110 #set $data_path = $data_path + ".bam"
111 #end if
112
113 -s "${data_path}"
114 #end for
115 -O "$options_usage.popmap"
116 #end if
117
118 #if str($m):
119 -m $m
120 #end if
121 #if str($P):
122 -P $P
123 #end if
124
125 ## Batch description
126 -b 1
127
128 ## No SQL recording
129 -S
130
131 ## snp_model
132 #if str( $snp_options.select_model.model_type) == "bounded":
133 --bound_low $snp_options.select_model.bound_low
134 --bound_high $snp_options.select_model.bound_high
135 --alpha $snp_options.select_model.alpha
136 #else if str( $snp_options.select_model.model_type) == "snp":
137 --alpha $snp_options.select_model.alpha
138 #end if
139
140 -o stacks_outputs
141
142 #if str( $options_usage.rad_analysis_type ) == "genetic":
143 @NORM_GENOTYPES_OUTPUT_LIGHT@
144 #end if
145
146 ## If input is in bam format, stacks will output gzipped files (no option to control this)
147 && if ls stacks_outputs/*.gz > /dev/null 2>&1; then gunzip stacks_outputs/*.gz; fi
148 ]]></command>
149
150 <inputs>
151 <conditional name="options_usage">
152 <param name="rad_analysis_type" type="select" label="Select your usage">
153 <option value="genetic" selected="true">Genetic map</option>
154 <option value="population">Population</option>
155 </param>
156 <when value="genetic">
157 <param name="parent_alignments" format="sam,bam" type="data" multiple="true" label="Files containing parent alignments" argument="-p" help="Dataset names will be used as sample name (no space allowed)." />
158 <param name="progeny_alignments" format="sam,bam" type="data" multiple="true" optional="true" label="Files containing progeny alignments" argument="-r" help="Dataset names will be used as sample name (no space allowed)." />
159
160 <param name="cross_type" argument="-A" type="select" label="Cross type">
161 <expand macro="cross_types"/>
162 </param>
163 </when>
164 <when value="population">
165 <param name="individual_sample" format="sam,bam" type="data" multiple="true" label="Files containing an individual sample from a population" argument="-s" help="Dataset names will be used as sample name (no space allowed)." />
166 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" argument="-O" />
167 </when>
168 </conditional>
169
170 <param name="m" type="integer" value="3" label="Minimum depth of coverage" help="specify the minimum depth of coverage to report a stack in pstacks" argument="-m" />
171 <param name="P" type="integer" value="" optional="true" label="Minimum depth of coverage in 'progeny' individuals" help="specify the minimum depth of coverage to report a stack in pstacks for 'progeny' individuals" argument="-P" />
172
173 <!-- SNP Model options -->
174 <section name="snp_options" title="SNP_Model_Options" expanded="False">
175 <expand macro="snp_options"/>
176 </section>
177 </inputs>
178 <outputs>
179 <data format="txt" name="output_log" label="ref_map.log with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/ref_map.log" />
180
181 <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
182 <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
183 <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
184
185 <expand macro="genotypes_output_light"/>
186 <expand macro="populations_output_light"/>
187
188 <collection name="tags" type="list" label="Assembled loci (tags) from ${on_string}">
189 <discover_datasets pattern="(?P&lt;name&gt;.+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs" />
190 </collection>
191
192 <collection name="snps" type="list" label="Model calls (snps) from each locus on ${on_string}">
193 <discover_datasets pattern="(?P&lt;name&gt;.+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs" />
194 </collection>
195
196 <collection name="alleles" type="list" label="Haplotypes (alleles) recorded from each locus on ${on_string}">
197 <discover_datasets pattern="(?P&lt;name&gt;.+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs" />
198 </collection>
199
200 <collection name="matches" type="list" label="Matches to the catalog on ${on_string}">
201 <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.tsv$" ext="tabular" directory="stacks_outputs" />
202 </collection>
203
204 <collection name="all_output" type="list" label="Full output from ref_map on ${on_string}">
205 <discover_datasets pattern="(?P&lt;name&gt;.+\.(tags|snps|alleles|matches))\.tsv$" ext="tabular" directory="stacks_outputs" />
206 <discover_datasets pattern="(?P&lt;name&gt;.+\.(haplotypes|genotypes|markers|hapstats|sumstats|sumstats_summary))\.tsv$" ext="tabular" directory="stacks_outputs" />
207 <discover_datasets pattern="(?P&lt;name&gt;.+\.(genotypes))\.(loc|txt)$" ext="txt" directory="stacks_outputs" />
208 </collection>
209 </outputs>
210
211 <tests>
212 <test>
213 <param name="options_usage|rad_analysis_type" value="genetic"/>
214 <param name="options_usage|parent_alignments" value="refmap/PopA_01.bam" />
215 <output name="output_log">
216 <assert_contents>
217 <has_text text="ref_map.pl completed" />
218 </assert_contents>
219 </output>
220
221 <!-- catalog -->
222 <output name="catalogsnps">
223 <assert_contents>
224 <has_text text="catalog generated" />
225 </assert_contents>
226 </output>
227 <output name="catalogalleles">
228 <assert_contents>
229 <has_text text="catalog generated" />
230 </assert_contents>
231 </output>
232 <output name="catalogtags">
233 <assert_contents>
234 <has_text text="catalog generated" />
235 </assert_contents>
236 </output>
237
238 <!-- genotypes -->
239 <output name="out_generic_haplo">
240 <assert_contents>
241 <has_text text="Catalog ID" />
242 </assert_contents>
243 </output>
244 <output name="out_sql_markers">
245 <assert_contents>
246 <has_text text="Total Genotypes" />
247 </assert_contents>
248 </output>
249 <output name="out_joinmap">
250 <assert_contents>
251 <has_text text="batch_1.genotypes_" />
252 </assert_contents>
253 </output>
254 <output name="out_sql_genotypes">
255 <assert_contents>
256 <has_text text="SQL ID" />
257 </assert_contents>
258 </output>
259 <output name="out_generic_haplo">
260 <assert_contents>
261 <has_text text="Seg Dist" />
262 </assert_contents>
263 </output>
264 <output name="out_sql_markers">
265 <assert_contents>
266 <has_text text="Total Genotypes" />
267 </assert_contents>
268 </output>
269
270 <!-- samples -->
271 <output_collection name="tags">
272 <element name="PopA_01.tags">
273 <assert_contents>
274 <has_text text="generated on " />
275 </assert_contents>
276 </element>
277 </output_collection>
278 <output_collection name="snps">
279 <element name="PopA_01.snps">
280 <assert_contents>
281 <has_text text="generated on " />
282 </assert_contents>
283 </element>
284 </output_collection>
285 <output_collection name="alleles">
286 <element name="PopA_01.alleles">
287 <assert_contents>
288 <has_text text="generated on " />
289 </assert_contents>
290 </element>
291 </output_collection>
292 <output_collection name="matches">
293 <element name="PopA_01.matches">
294 <assert_contents>
295 <has_text text="generated on " />
296 </assert_contents>
297 </element>
298 </output_collection>
299 </test>
300 <test>
301 <param name="options_usage|rad_analysis_type" value="genetic"/>
302 <param name="options_usage|parent_alignments" value="refmap/PopA_01.bam" />
303 <param name="options_usage|progeny_alignments" value="refmap/PopA_02.bam" />
304 <output name="output_log">
305 <assert_contents>
306 <has_text text="ref_map.pl completed" />
307 </assert_contents>
308 </output>
309
310 <!-- catalog -->
311 <output name="catalogsnps">
312 <assert_contents>
313 <has_text text="catalog generated" />
314 </assert_contents>
315 </output>
316 <output name="catalogalleles">
317 <assert_contents>
318 <has_text text="catalog generated" />
319 </assert_contents>
320 </output>
321 <output name="catalogtags">
322 <assert_contents>
323 <has_text text="catalog generated" />
324 </assert_contents>
325 </output>
326
327 <!-- genotypes -->
328 <output name="out_generic_haplo">
329 <assert_contents>
330 <has_text text="Catalog ID" />
331 </assert_contents>
332 </output>
333 <output name="out_sql_markers">
334 <assert_contents>
335 <has_text text="Total Genotypes" />
336 </assert_contents>
337 </output>
338 <output name="out_joinmap">
339 <assert_contents>
340 <has_text text="batch_1.genotypes_" />
341 </assert_contents>
342 </output>
343 <output name="out_sql_genotypes">
344 <assert_contents>
345 <has_text text="SQL ID" />
346 </assert_contents>
347 </output>
348 <output name="out_generic_haplo">
349 <assert_contents>
350 <has_text text="Seg Dist" />
351 </assert_contents>
352 </output>
353 <output name="out_sql_markers">
354 <assert_contents>
355 <has_text text="Total Genotypes" />
356 </assert_contents>
357 </output>
358
359 <!-- samples -->
360 <output_collection name="tags">
361 <element name="PopA_01.tags">
362 <assert_contents>
363 <has_text text="generated on " />
364 </assert_contents>
365 </element>
366 </output_collection>
367 <output_collection name="snps">
368 <element name="PopA_01.snps">
369 <assert_contents>
370 <has_text text="generated on " />
371 </assert_contents>
372 </element>
373 </output_collection>
374 <output_collection name="alleles">
375 <element name="PopA_01.alleles">
376 <assert_contents>
377 <has_text text="generated on " />
378 </assert_contents>
379 </element>
380 </output_collection>
381 <output_collection name="matches">
382 <element name="PopA_01.matches">
383 <assert_contents>
384 <has_text text="generated on " />
385 </assert_contents>
386 </element>
387 </output_collection>
388 </test>
389 <test>
390 <param name="options_usage|rad_analysis_type" value="population"/>
391 <param name="options_usage|individual_sample" value="refmap/PopA_01.bam,refmap/PopA_02.bam,refmap/PopA_03.bam,refmap/PopA_04.bam,refmap/PopB_01.bam,refmap/PopB_02.bam,refmap/PopB_03.bam,refmap/PopB_04.bam" />
392 <param name="options_usage|popmap" value="denovo_map/popmap.tsv" />
393 <output name="output_log">
394 <assert_contents>
395 <has_text text="ref_map.pl completed" />
396 </assert_contents>
397 </output>
398
399 <!-- catalog -->
400 <output name="catalogtags">
401 <assert_contents>
402 <has_text text="catalog generated on" />
403 </assert_contents>
404 </output>
405 <output name="catalogsnps">
406 <assert_contents>
407 <has_text text="catalog generated on" />
408 </assert_contents>
409 </output>
410 <output name="catalogalleles">
411 <assert_contents>
412 <has_text text="catalog generated on" />
413 </assert_contents>
414 </output>
415
416 <!-- populations -->
417 <output name="out_haplotypes">
418 <assert_contents>
419 <has_text text="PopA_01" />
420 </assert_contents>
421 </output>
422 <output name="out_hapstats">
423 <assert_contents>
424 <has_text text="Smoothed Gene Diversity" />
425 </assert_contents>
426 </output>
427 <output name="out_populations_log">
428 <assert_contents>
429 <has_text text="populations version" />
430 </assert_contents>
431 </output>
432 <output name="out_sumstats_sum">
433 <assert_contents>
434 <has_text text="Polymorphic Sites" />
435 </assert_contents>
436 </output>
437 <output name="out_sumstats">
438 <assert_contents>
439 <has_text text="Smoothed Pi" />
440 </assert_contents>
441 </output>
442
443 <!-- samples -->
444 <output_collection name="tags">
445 <element name="PopA_01.tags">
446 <assert_contents>
447 <has_text text="generated on " />
448 </assert_contents>
449 </element>
450 </output_collection>
451 <output_collection name="snps">
452 <element name="PopA_01.snps">
453 <assert_contents>
454 <has_text text="generated on " />
455 </assert_contents>
456 </element>
457 </output_collection>
458 <output_collection name="alleles">
459 <element name="PopA_01.alleles">
460 <assert_contents>
461 <has_text text="generated on " />
462 </assert_contents>
463 </element>
464 </output_collection>
465 <output_collection name="matches">
466 <element name="PopA_01.matches">
467 <assert_contents>
468 <has_text text="generated on " />
469 </assert_contents>
470 </element>
471 </output_collection>
472 </test>
473 </tests>
474
475 <help>
476 <![CDATA[
477 .. class:: infomark
478
479 **What it does**
480
481 This program expects data that have been aligned to a reference genome, and can accept data directly from Bowtie, or from any aligner that can produce SAM format. To avoid datasets names problems, we recommand the use of the *Map with BWA for STACKS tool*. This program will execute each of the Stacks components: first, running pstacks on each of the samples specified, building loci (based on the reference alignment) and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci specified as 'parents' or 'samples' on the command line, again using alignment to match loci in the catalog. Finally, sstacks will be executed to match each sample against the catalog. The ref_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
482
483 --------
484
485 **Input files**
486
487 - SAM, BAM
488
489 - Population map::
490
491 indv_01 1
492 indv_02 1
493 indv_03 1
494 indv_04 2
495 indv_05 2
496 indv_06 2
497
498 **Output files**
499
500 - XXX.tags.tsv file:
501
502 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
503
504 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
505
506
507 - XXX.snps.tsv file:
508
509 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
510
511 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
512
513
514 - XXX.alleles.tsv file:
515
516 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
517
518
519 - XXX.matches.tsv file:
520
521 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
522
523 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
524
525
526 - other files:
527
528 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
529
530 @STACKS_INFOS@
531 ]]>
532 </help>
533 <expand macro="citation" />
534 </tool>