comparison variant_select.xml @ 11:0d369d08ad6e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gatk2 commit 344140b8df53b8b7024618bb04594607a045c03a
author iuc
date Mon, 04 May 2015 22:47:06 -0400
parents b80301676614
children 669a23f1f4b5
comparison
equal deleted inserted replaced
10:5db8d6815cf3 11:0d369d08ad6e
1 <tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.0"> 1 <tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.0">
2 <description>from VCF files</description> 2 <description>from VCF files</description>
3 <expand macro="requirements" />
4 <expand macro="version_command" />
5 <macros> 3 <macros>
6 <import>gatk2_macros.xml</import> 4 <import>gatk2_macros.xml</import>
7 </macros> 5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python"> 8 <command interpreter="python">
9 #from binascii import hexlify 9 #from binascii import hexlify
10 10
11 gatk2_wrapper.py 11 gatk2_wrapper.py
12 --stdout "${output_log}" 12 --stdout "${output_log}"
16 -T "SelectVariants" 16 -T "SelectVariants"
17 \$GATK2_SITE_OPTIONS 17 \$GATK2_SITE_OPTIONS
18 18
19 @THREADS@ 19 @THREADS@
20 -o "${output_vcf}" 20 -o "${output_vcf}"
21 21
22 #if $reference_source.reference_source_selector != "history": 22 #if $reference_source.reference_source_selector != "history":
23 -R "${reference_source.ref_file.fields.path}" 23 -R "${reference_source.ref_file.fields.path}"
24 #end if 24 #end if
25 ' 25 '
26 -p ' 26 -p '
28 --concordance "${input_concordance}" 28 --concordance "${input_concordance}"
29 #end if 29 #end if
30 #if $input_discordance: 30 #if $input_discordance:
31 --discordance "${input_discordance}" 31 --discordance "${input_discordance}"
32 #end if 32 #end if
33 33
34 #for $exclude_sample_name in $exclude_sample_name_repeat: 34 #for $exclude_sample_name in $exclude_sample_name_repeat:
35 --exclude_sample_name "${exclude_sample_name.exclude_sample_name}" 35 --exclude_sample_name "${exclude_sample_name.exclude_sample_name}"
36 #end for 36 #end for
37 37
38 ${exclude_filtered} 38 ${exclude_filtered}
39 39
40 #for $sample_name in $sample_name_repeat: 40 #for $sample_name in $sample_name_repeat:
41 --sample_name "${sample_name.sample_name}" 41 --sample_name "${sample_name.sample_name}"
42 #end for 42 #end for
43 ' 43 '
44 44
45 #for $select_expressions in $select_expressions_repeat: 45 #for $select_expressions in $select_expressions_repeat:
46 #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) ) 46 #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) )
47 -o '${ hexlify( $select_expression ) }' 47 -o '${ hexlify( $select_expression ) }'
48 #end for 48 #end for
49 49
50 ##start tool specific options 50 ##start tool specific options
51 #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced': 51 #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced':
52 -p ' 52 -p '
53 #for $exclude_sample_file in $analysis_param_type.exclude_sample_file_repeat: 53 #for $exclude_sample_file in $analysis_param_type.exclude_sample_file_repeat:
54 --exclude_sample_file "${exclude_sample_file.exclude_sample_file}" 54 --exclude_sample_file "${exclude_sample_file.exclude_sample_file}"
55 #end for 55 #end for
56 56
57 #for $sample_file in $analysis_param_type.sample_file_repeat: 57 #for $sample_file in $analysis_param_type.sample_file_repeat:
58 --sample_file "${ample_file.sample_file}" 58 --sample_file "${ample_file.sample_file}"
59 #end for 59 #end for
60 60
61 #if $analysis_param_type.input_keep_ids: 61 #if $analysis_param_type.input_keep_ids:
62 --keepIDs "${analysis_param_type.input_keep_ids}" 62 --keepIDs "${analysis_param_type.input_keep_ids}"
63 #end if 63 #end if
64 64
65 ${analysis_param_type.keep_original_AC} 65 ${analysis_param_type.keep_original_AC}
66 66
67 ${analysis_param_type.mendelian_violation} 67 ${analysis_param_type.mendelian_violation}
68 68
69 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" 69 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}"
70 70
71 --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}" 71 --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}"
72 72
73 --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}" 73 --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}"
74 74
75 #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction': 75 #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction':
76 --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}" 76 --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}"
77 #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number': 77 #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number':
78 --select_random_number "${analysis_param_type.select_random_type.select_random_number}" 78 --select_random_number "${analysis_param_type.select_random_type.select_random_number}"
79 #end if 79 #end if
80 80
81 #if $analysis_param_type.select_type_to_include: 81 #if $analysis_param_type.select_type_to_include:
82 #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ): 82 #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ):
83 --selectTypeToInclude "${type_to_include}" 83 --selectTypeToInclude "${type_to_include}"
84 #end for 84 #end for
85 #end if 85 #end if
86 86
87 ${analysis_param_type.exclude_non_variants} 87 ${analysis_param_type.exclude_non_variants}
88 ' 88 '
89 89
90 #for $sample_expressions in $analysis_param_type.sample_expressions_repeat: 90 #for $sample_expressions in $analysis_param_type.sample_expressions_repeat:
91 #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) ) 91 #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) )
92 -o '${ hexlify( $sample_expression ) }' 92 -o '${ hexlify( $sample_expression ) }'
93 #end for 93 #end for
94 94
95 #end if 95 #end if
96 ##end tool specific options 96 ##end tool specific options
97 97
98 #include source=$standard_gatk_options# 98 #include source=$standard_gatk_options#
99
100
101 </command> 99 </command>
102 <inputs> 100 <inputs>
103 <conditional name="reference_source"> 101 <conditional name="reference_source">
104 <expand macro="reference_source_selector_param" /> 102 <expand macro="reference_source_selector_param" />
105 <when value="cached"> 103 <when value="cached">
114 <when value="history"> <!-- FIX ME!!!! --> 112 <when value="history"> <!-- FIX ME!!!! -->
115 <param name="input_variant" type="data" format="vcf" label="Variant file to select" help="-V,--variant &amp;lt;variant&amp;gt;" /> 113 <param name="input_variant" type="data" format="vcf" label="Variant file to select" help="-V,--variant &amp;lt;variant&amp;gt;" />
116 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" /> 114 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
117 </when> 115 </when>
118 </conditional> 116 </conditional>
119 117
120 <repeat name="select_expressions_repeat" title="Criteria to use when selecting the data" help="-select,--select_expressions &amp;lt;select_expressions&amp;gt;"> 118 <repeat name="select_expressions_repeat" title="Criteria to use when selecting the data" help="-select,--select_expressions &amp;lt;select_expressions&amp;gt;">
121 <param name="select_expressions" type="text" label="JEXL expression"> 119 <param name="select_expressions" type="text" label="JEXL expression">
122 <sanitizer> 120 <sanitizer>
123 <valid initial="string.printable"> 121 <valid initial="string.printable">
124 <remove value="&apos;"/> 122 <remove value="&apos;"/>
125 </valid> 123 </valid>
126 <mapping initial="none"/> 124 <mapping initial="none"/>
127 </sanitizer> 125 </sanitizer>
128 </param> 126 </param>
129 </repeat> 127 </repeat>
130 128
131 <param name="input_concordance" type="data" format="vcf" label="Output variants that were also called in this comparison track" optional="True" help="-conc,--concordance &amp;lt;concordance&amp;gt;"/> 129 <param name="input_concordance" type="data" format="vcf" label="Output variants that were also called in this comparison track" optional="True" help="-conc,--concordance &amp;lt;concordance&amp;gt;"/>
132 <param name="input_discordance" type="data" format="vcf" label="Output variants that were not called in this comparison track" optional="True" help="-disc,--discordance &amp;lt;discordance&amp;gt;"/> 130 <param name="input_discordance" type="data" format="vcf" label="Output variants that were not called in this comparison track" optional="True" help="-disc,--discordance &amp;lt;discordance&amp;gt;"/>
133 131
134 <repeat name="sample_name_repeat" title="Include Samples by name" help="-sn,--sample_name &amp;lt;sample_name&amp;gt;"> 132 <repeat name="sample_name_repeat" title="Include Samples by name" help="-sn,--sample_name &amp;lt;sample_name&amp;gt;">
135 <param name="sample_name" type="text" label="Include genotypes from this sample"/> 133 <param name="sample_name" type="text" label="Include genotypes from this sample"/>
136 </repeat> 134 </repeat>
137 135
138 <repeat name="exclude_sample_name_repeat" title="Exclude Samples by name" help="-xl_sn,--exclude_sample_name &amp;lt;exclude_sample_name&amp;gt;"> 136 <repeat name="exclude_sample_name_repeat" title="Exclude Samples by name" help="-xl_sn,--exclude_sample_name &amp;lt;exclude_sample_name&amp;gt;">
139 <param name="exclude_sample_name" type="text" label="Exclude genotypes from this sample"/> 137 <param name="exclude_sample_name" type="text" label="Exclude genotypes from this sample"/>
140 </repeat> 138 </repeat>
141 139
142 <param name="exclude_filtered" type="boolean" truevalue="--excludeFiltered" falsevalue="" label="Don't include filtered loci in the analysis" help="-ef,--excludeFiltered" /> 140 <param name="exclude_filtered" type="boolean" truevalue="--excludeFiltered" falsevalue="" label="Don't include filtered loci in the analysis" help="-ef,--excludeFiltered" />
143 141
144 <expand macro="gatk_param_type_conditional" /> 142 <expand macro="gatk_param_type_conditional" />
145 143
146
147 <expand macro="analysis_type_conditional"> 144 <expand macro="analysis_type_conditional">
148 145
149 <repeat name="exclude_sample_file_repeat" title="Exclude Samples by file" help="-xl_sf,--exclude_sample_file &amp;lt;exclude_sample_file&amp;gt;"> 146 <repeat name="exclude_sample_file_repeat" title="Exclude Samples by file" help="-xl_sf,--exclude_sample_file &amp;lt;exclude_sample_file&amp;gt;">
150 <param name="exclude_sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to exclude"/> 147 <param name="exclude_sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to exclude"/>
151 </repeat> 148 </repeat>
152 149
153 <repeat name="sample_file_repeat" title="Samples by file" help="-sf,--sample_file &amp;lt;sample_file&amp;gt;"> 150 <repeat name="sample_file_repeat" title="Samples by file" help="-sf,--sample_file &amp;lt;sample_file&amp;gt;">
154 <param name="sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to include" /> 151 <param name="sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to include" />
155 </repeat> 152 </repeat>
156 153
157 <param name="input_keep_ids" type="data" format="text" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &amp;lt;keepIDs&amp;gt;"/> 154 <param name="input_keep_ids" type="data" format="text" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &amp;lt;keepIDs&amp;gt;"/>
158 155
159 <param name="keep_original_AC" type="boolean" truevalue="--keepOriginalAC" falsevalue="" label="Don't update the AC, AF, or AN values in the INFO field after selecting" help="-keepOriginalAC,--keepOriginalAC" /> 156 <param name="keep_original_AC" type="boolean" truevalue="--keepOriginalAC" falsevalue="" label="Don't update the AC, AF, or AN values in the INFO field after selecting" help="-keepOriginalAC,--keepOriginalAC" />
160 157
161 <param name="mendelian_violation" type="boolean" truevalue="--mendelianViolation" falsevalue="" label="output mendelian violation sites only" help="-mv,--mendelianViolation" /> 158 <param name="mendelian_violation" type="boolean" truevalue="--mendelianViolation" falsevalue="" label="output mendelian violation sites only" help="-mv,--mendelianViolation" />
162 159
163 <param name="mendelian_violation_qual_threshold" type="float" label="Minimum genotype QUAL score for each trio member required to accept a site as a mendelian violation" value="0" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;" /> 160 <param name="mendelian_violation_qual_threshold" type="float" label="Minimum genotype QUAL score for each trio member required to accept a site as a mendelian violation" value="0" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;" />
164 161
165 <param name="remove_fraction_genotypes" type="float" label="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall" value="0" min="0" max="1" help="-fractionGenotypes,--remove_fraction_genotypes &amp;lt;remove_fraction_genotypes&amp;gt;" /> 162 <param name="remove_fraction_genotypes" type="float" label="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall" value="0" min="0" max="1" help="-fractionGenotypes,--remove_fraction_genotypes &amp;lt;remove_fraction_genotypes&amp;gt;" />
166 163
167 <param name="restrict_alleles_to" type="select" label="Select only variants of a particular allelicity" help="-restrictAllelesTo,--restrictAllelesTo &amp;lt;restrictAllelesTo&amp;gt;"> 164 <param name="restrict_alleles_to" type="select" label="Select only variants of a particular allelicity" help="-restrictAllelesTo,--restrictAllelesTo &amp;lt;restrictAllelesTo&amp;gt;">
168 <option value="ALL" selected="True">ALL</option> 165 <option value="ALL" selected="True">ALL</option>
169 <option value="MULTIALLELIC">MULTIALLELIC</option> 166 <option value="MULTIALLELIC">MULTIALLELIC</option>
170 <option value="BIALLELIC">BIALLELIC</option> 167 <option value="BIALLELIC">BIALLELIC</option>
171 </param> 168 </param>
172 169
173 <repeat name="sample_expressions_repeat" title="Regular expression to select many samples from the ROD tracks provided" help="-se,--sample_expressions &amp;lt;sample_expressions&amp;gt;"> 170 <repeat name="sample_expressions_repeat" title="Regular expression to select many samples from the ROD tracks provided" help="-se,--sample_expressions &amp;lt;sample_expressions&amp;gt;">
174 <param name="sample_expressions" type="text" label="Regular expression"> 171 <param name="sample_expressions" type="text" label="Regular expression">
175 <sanitizer> 172 <sanitizer>
176 <valid initial="string.printable"> 173 <valid initial="string.printable">
177 <remove value="&apos;"/> 174 <remove value="&apos;"/>
178 </valid> 175 </valid>
179 <mapping initial="none"/> 176 <mapping initial="none"/>
180 </sanitizer> 177 </sanitizer>
181 </param> 178 </param>
182 </repeat> 179 </repeat>
183 180
184 <conditional name="select_random_type"> 181 <conditional name="select_random_type">
185 <param name="select_random_type_selector" type="select" label="Select a random subset of variants"> 182 <param name="select_random_type_selector" type="select" label="Select a random subset of variants">
186 <option value="select_all" selected="True">Use all variants</option> 183 <option value="select_all" selected="True">Use all variants</option>
187 <option value="select_random_fraction">Select random fraction</option> 184 <option value="select_random_fraction">Select random fraction</option>
188 <option value="select_random_number">Select random number</option> 185 <option value="select_random_number">Select random number</option>
195 </when> 192 </when>
196 <when value="select_random_number"> 193 <when value="select_random_number">
197 <param name="select_random_number" type="integer" value="0" label="Count" help="-number,--select_random_number &amp;lt;select_random_number&amp;gt;" /> 194 <param name="select_random_number" type="integer" value="0" label="Count" help="-number,--select_random_number &amp;lt;select_random_number&amp;gt;" />
198 </when> 195 </when>
199 </conditional> 196 </conditional>
200 197
201 <param name="exclude_non_variants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the subsetting procedure" help="-env,--excludeNonVariants" /> 198 <param name="exclude_non_variants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the subsetting procedure" help="-env,--excludeNonVariants" />
202 199
203 <param name="select_type_to_include" type="select" label="Select only a certain type of variants from the input file" multiple="True" display="checkboxes" help="-selectType,--selectTypeToInclude &amp;lt;selectTypeToInclude&amp;gt;"> 200 <param name="select_type_to_include" type="select" label="Select only a certain type of variants from the input file" multiple="True" display="checkboxes" help="-selectType,--selectTypeToInclude &amp;lt;selectTypeToInclude&amp;gt;">
204 <option value="INDEL">INDEL</option> 201 <option value="INDEL">INDEL</option>
205 <option value="SNP">SNP</option> 202 <option value="SNP">SNP</option>
206 <option value="MIXED">MIXED</option> 203 <option value="MIXED">MIXED</option>
207 <option value="MNP">MNP</option> 204 <option value="MNP">MNP</option>
208 <option value="SYMBOLIC">SYMBOLIC</option> 205 <option value="SYMBOLIC">SYMBOLIC</option>
209 <option value="NO_VARIATION">NO_VARIATION</option> 206 <option value="NO_VARIATION">NO_VARIATION</option>
210 </param> 207 </param>
211 </expand> 208 </expand>
212 209
213 </inputs> 210 </inputs>
214 <outputs> 211 <outputs>
215 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" /> 212 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
216 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> 213 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
217 </outputs> 214 </outputs>
226 <param name="exclude_sample_name_repeat" value="0" /> 223 <param name="exclude_sample_name_repeat" value="0" />
227 <param name="exclude_filtered" /> 224 <param name="exclude_filtered" />
228 <param name="sample_name_repeat" value="0" /> 225 <param name="sample_name_repeat" value="0" />
229 <param name="gatk_param_type_selector" value="basic" /> 226 <param name="gatk_param_type_selector" value="basic" />
230 <param name="analysis_param_type_selector" value="basic" /> 227 <param name="analysis_param_type_selector" value="basic" />
231 <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" /> 228 <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" />
232 <output name="output_log" file="gatk/gatk_variant_select/gatk_variant_select_out_1.log.contains" compare="contains" /> 229 <output name="output_log" file="gatk/gatk_variant_select/gatk_variant_select_out_1.log.contains" compare="contains" />
233 </test> 230 </test>
234 </tests> 231 </tests>
235 <help> 232 <help>
236 **What it does** 233 **What it does**
237 234
238 Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP &gt; 1000" (depth of coverage greater than 1000x), "AF &lt; 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section &lt;http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk&gt;`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants. 235 Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP &gt; 1000" (depth of coverage greater than 1000x), "AF &lt; 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section &lt;http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk&gt;`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants.
239 236
240 For more information on using the SelectVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_SelectVariants.html&gt;`_. 237 For more information on using the SelectVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_SelectVariants.html&gt;`_.
241 238
242 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_. 239 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
243 240