comparison stacks_genotypes.xml @ 0:5d5cb1a9e4eb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit e1c1550e0bd61c88ffead2b1c4f6ab7393052393
author iuc
date Sat, 25 Jun 2016 17:26:27 -0400
parents
children 62d80d00724e
comparison
equal deleted inserted replaced
-1:000000000000 0:5d5cb1a9e4eb
1 <tool id="stacks_genotypes" name="Stacks: genotypes" version="@WRAPPER_VERSION@.1">
2 <description>analyse haplotypes or genotypes in a genetic cross ('genotypes' program)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9 #import re
10
11 mkdir stacks_outputs
12
13 &&
14
15 #for $input_file in $input_col:
16 #set $filename = str($input_file.element_identifier)
17 #if not $filename.endswith('.tsv'):
18 #set $filename = $filename + ".tsv"
19 #end if
20 #if re.search('\.(tags|snps|alleles|matches)(\.tsv)?$', $filename):
21 ln -s "${input_file}" "stacks_outputs/${filename}" &&
22 #end if
23 #end for
24
25 genotypes
26
27 -P stacks_outputs
28 -b $advanced_options.batchid
29
30 -t $options_usage.cross_type
31 -o $options_usage.map_out.map_out_type
32
33 #if str( $options_usage.map_out.map_out_type ) == "genomic":
34 -e ${options_usage.map_out.enzyme}
35 #end if
36
37 #if str($advanced_options.minprogeny):
38 -r $advanced_options.minprogeny
39 #end if
40
41 #if str($advanced_options.mindepth):
42 -m $advanced_options.mindepth
43 #end if
44
45 #if str($advanced_options.lnl):
46 --lnl_lim $advanced_options.lnl
47 #end if
48
49 #if $advanced_options.blacklist:
50 -B "$advanced_options.blacklist"
51 #end if
52 #if $advanced_options.whitelist:
53 -W "$advanced_options.whitelist"
54 #end if
55
56 #if $advanced_options.manual_cor:
57 --corr_path "$advanced_options.manual_cor"
58 #end if
59
60 #if $options_autocorr.corrections:
61 -c
62 --min_hom_seqs $options_autocorr.hom
63 --min_het_seqs $options_autocorr.het
64 --max_het_seqs $options_autocorr.hetmax
65 #end if
66
67 ## output SQL file (as denovo/refmap)
68 -s
69
70 @NORM_GENOTYPES_OUTPUT_FULL@
71 ]]></command>
72 <inputs>
73 <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map or refmap)" />
74
75 <section name="options_usage" title="Genotyping options">
76
77 <param name="cross_type" argument="-t" type="select" label="Cross type">
78 <expand macro="cross_types"/>
79 </param>
80
81 <conditional name="map_out">
82 <param argument="-o" name="map_out_type" type="select" label="Output file type" help="Output map file type to write" >
83 <option value="joinmap">JoinMap</option>
84 <option value="onemap">OneMap</option>
85 <option value="rqtl">R/QTL</option>
86 <option value="genomic">Genomic</option>
87 </param>
88 <when value="genomic">
89 <param name="enzyme" argument="-e" type="select" label="Restriction enzyme used" help="Only needed for Genomic output format">
90 <expand macro="enzymes"/>
91 </param>
92 </when>
93 </conditional>
94 </section>
95
96 <conditional name="options_autocorr">
97 <param name="corrections" argument="-c" type="boolean" checked="true" truevalue="-c" falsevalue="" label="Make automated corrections to the data" />
98 <when value="-c">
99 <param name="hom" argument="--min_hom_seqs" type="integer" value="5" label="Minimum number of reads required at a stack to call a homozygous genotype" />
100 <param name="het" argument="--min_het_seqs:" type="float" value="0.05" label="Heterozygote minor allele minimum frequency" help="below this minor allele frequency a stack is called a homozygote, above it (but below --max_het_seqs) it is called unknown" />
101 <param name="hetmax" argument="--max_het_seqs:" type="float" value="0.1" label="Heterozygote minor allele maximum frequency" help="minimum frequency of minor allele to call a heterozygote" />
102 </when>
103 <when value="">
104 </when>
105 </conditional>
106
107 <!-- Output options -->
108 <section name="advanced_options" title="advanced options" expanded="False">
109 <param name="minprogeny" type="integer" value="0" optional="true" argument="-r" label="Minimum number of progeny required to print a marker" />
110 <param name="mindepth" type="integer" value="" optional="true" argument="-m" label="Minimum stack depth required before exporting a locus in a particular individual" />
111 <param name="lnl" type="float" value="" optional="true" argument="--lnl_lim" label="Filter loci with log likelihood values below this threshold" />
112
113 <param name="whitelist" argument="-W" format="txt,tabular" type="data" optional="true" label="Specify a file containing Whitelisted markers to include in the export" />
114 <param name="blacklist" argument="-B" format="txt,tabular" type="data" optional="true" label="Specify a file containing Blacklisted markers to be excluded from the export" />
115
116 <param name="manual_cor" argument="--cor_path" type="data" format="tabular,txt" optional="true" label="Path to file containing manual genotype corrections from a Stacks SQL database to incorporate into output." />
117
118 <param name="batchid" type="integer" value="1" label="Batch ID to examine when exporting from the catalog" help="Only useful if you analyse data that was processed outside galaxy" />
119 </section>
120 </inputs>
121 <outputs>
122 <expand macro="genotypes_output_full"/>
123 </outputs>
124
125 <tests>
126 <test>
127 <param name="input_col">
128 <collection type="list">
129 <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
130 <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
131 <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
132 <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
133 <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
134 <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
135 <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
136 <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
137 <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
138 <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
139 <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
140 </collection>
141 </param>
142 <param name="map_out_type" value="joinmap" />
143 <param name="cross_type" value="CP" />
144 <param name="advanced_options|minprogeny" value="1" />
145
146 <!-- genotypes -->
147 <output name="out_generic_haplo">
148 <assert_contents>
149 <has_text text="Catalog ID" />
150 </assert_contents>
151 </output>
152 <output name="out_sql_markers">
153 <assert_contents>
154 <has_text text="Total Genotypes" />
155 </assert_contents>
156 </output>
157 <output name="out_joinmap">
158 <assert_contents>
159 <has_text text="batch_1.genotypes_" />
160 </assert_contents>
161 </output>
162 <output name="out_sql_genotypes">
163 <assert_contents>
164 <has_text text="SQL ID" />
165 </assert_contents>
166 </output>
167 </test>
168 </tests>
169 <help>
170 <![CDATA[
171 .. class:: infomark
172
173 **What it does**
174
175 This program exports a Stacks data set either as a set of observed haplotypes at each locus in the population, or with the haplotypes encoded into genotypes. The -r option allows only loci that exist in a certain number of population individuals to be exported. In a mapping context, raising or lowering this limit is an effective way to control the quality level of markers exported as genuine markers will be found in a large number of progeny. If exporting a set of observed haplotypes in a population, the "min stack depth" option can be used to restict exported loci to those that have a minimum depth of reads.
176
177 By default, when executing the pipeline (either denovo_map or ref_map) the genotypes program will be executed last and will identify mappable markers in the population and export both a set of observed haplotypes and a set of generic genotypes with "min number of progeny" option = 1.
178
179
180 Making Corrections
181
182 If enabled with the "make automated corrections to the data" option, the genotypes program will make automated corrections to the data. Since loci are matched up in the population, the script can correct false-negative heterozygote alleles since it knows the existence of alleles at a particular locus in the other individuals. For example, the program will identify loci with SNPs that didn’t have high enough coverage to be identified by the SNP caller. It will also check that homozygous tags have a minimum depth of coverage, since a low-coverage polymorphic locus may appear homozygous simply because the other allele wasn’t sequenced.
183
184
185 Correction Thresholds
186
187 The thresholds for automatic corrections can be modified by using the "automated corrections option" and changing the default values for the "min number of reads for homozygous genotype", "homozygote minor minimum allele frequency" and "heterozygote minor minimum allele frequency" parameters to genotypes. "min number of reads for homozygous genotype" is the minimum number of reads required to consider a stack homozygous (default of 5). The "homozygote minor minimum allele frequency" and "heterozygote minor minimum allele frequency" variables represent fractions. If the ratio of the depth of the the smaller allele to the bigger allele is greater than "heterozygote minor minimum allele frequency" (default of 1/10) a stack is called a het. If the ratio is less than homozygote minor minimum allele frequency (default of 1/20) a stack is called homozygous. If the ratio is in between the two values it is unknown and a genotype will not be assigned.
188
189 Automated corrections made by the program are shown in the output file in capital letters.
190
191 --------
192
193 **Input files**
194
195 Output from denovo_map or ref_map
196
197 **Output files:**
198
199 - XXX.tags.tsv file:
200
201 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
202
203 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
204
205
206 - XXX.snps.tsv file:
207
208 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
209
210 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
211
212
213 - XXX.alleles.tsv file:
214
215 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
216
217
218 - XXX.matches.tsv file:
219
220 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
221
222 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
223
224
225 - other files:
226
227 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
228
229 @STACKS_INFOS@
230 ]]>
231 </help>
232 <expand macro="citation" />
233 </tool>
234