comparison freebayes.xml @ 2:14f952d2a9db draft

Uploaded
author devteam
date Thu, 11 Dec 2014 18:38:34 -0500
parents e21073b0dc1f
children 9f3d6c3098ac
comparison
equal deleted inserted replaced
1:ef435b7b0420 2:14f952d2a9db
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="freebayes" name="FreeBayes" version="0.0.3"> 2 <tool id="freebayes" name="FreeBayes" version="0.3">
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.9.6_9608597d12e127c847ae03aa03440ab63992fedf">freebayes</requirement> 4 <requirement type="package" version="0.9.18_0059bdf">freebayes</requirement>
5 <requirement type="package" version="0.1.18">samtools</requirement> 5 <requirement type="package" version="0.1.18">samtools</requirement>
6 </requirements> 6 </requirements>
7 <description> - Bayesian genetic variant detector</description> 7 <description> - bayesian genetic variant detector</description>
8 <command> 8 <command>
9 ##set up input files 9 ##set up input files
10
10 #set $reference_fasta_filename = "localref.fa" 11 #set $reference_fasta_filename = "localref.fa"
12
11 #if str( $reference_source.reference_source_selector ) == "history": 13 #if str( $reference_source.reference_source_selector ) == "history":
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp; 14 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
13 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for FreeBayes" &gt;&amp;2 &amp;&amp; 15 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for FreeBayes" &gt;&amp;2 &amp;&amp;
14 #else: 16 #else:
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) 17 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
16 #end if 18 #end if
19
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): 20 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp; 21 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp; 22 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
20 #end for 23 #end for
24
25 ## Tabixize optional input_varinat_vcf file (for --variant-input option)
26
27 #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and $options_type.optional_inputs.optional_inputs_selector and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
28 ln -s "${options_type.optional_inputs.input_variant_type.input_variant_vcf}" "input_variant_vcf.vcf.gz" &amp;&amp;
29 ln -s "${Tabixized_input}" "input_variant_vcf.vcf.gz.tbi" &amp;&amp;
30 #end if
31
21 ##finished setting up inputs 32 ##finished setting up inputs
22 33
23 ##start FreeBayes commandline 34 ##COMMAND LINE STARTS HERE
35
24 freebayes 36 freebayes
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): 37 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
26 --bam "localbam_${bam_count}.bam" 38 --bam "localbam_${bam_count}.bam"
27 #end for 39 #end for
28 --fasta-reference "${reference_fasta_filename}" 40 --fasta-reference "${reference_fasta_filename}"
29 41
30 ##outputs 42 ##outputs
31 --vcf "${output_vcf}" 43 --vcf "${output_vcf}"
32 44
45 #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
46 --targets "${target_limit_type.input_target_bed}"
47 #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region":
48 --region "${target_limit_type.region_chromosome}:${target_limit_type.region_start}..${target_limit_type.region_end}"
49 #end if
50
33 ##advanced options 51 ##advanced options
34 #if str( $options_type.options_type_selector ) == "advanced": 52 #if str( $options_type.options_type_selector ) == "simple":
35 ##additional outputs 53 ##do nothing as command like build up to this point is sufficinet for simple diploid calling
36 #if $options_type.output_trace_option: 54
55 #elif str( $options_type.options_type_selector ) == "simple_w_filters":
56
57 --standard-filters
58 --min-coverage "${options_type.min_coverage}"
59
60 #elif str( $options_type.options_type_selector ) == "naive":
61
62 --haplotype-length 0
63 --min-alternate-count 1
64 --min-alternate-fraction 0
65 --pooled-continuous
66 --report-monomorphic
67
68 #elif str( $options_type.options_type_selector ) == "naive_w_filters":
69
70 --haplotype-length 0
71 --min-alternate-count 1
72 --min-alternate-fraction 0
73 --pooled-continuous
74 --report-monomorphic
75 --standard-filters
76 --min-coverage "${options_type.min_coverage}"
77
78 ## Command line direct text entry is not allowed at this time for security reasons
79
80 ## #elif str( $options_type.options_type_selector ) == "cline":
81
82 ## ${options_type.cline}
83
84 ## @optional_inputs_outputs@
85
86 #elif str( $options_type.options_type_selector ) == "full":
87
88 ##optional inputs and outputs
89
90 @optional_inputs_outputs@
91
92 ## REPORTING
93
94 #if str( $options_type.reporting.reporting_selector ) == "True":
95 --pvar ${options_type.reporting.pvar}
96 #end if
97
98 ## POPULATION MODEL
99
100 #if str( $options_type.population_model.population_model_selector ) == "True":
101 --theta "${options_type.population_model.T}"
102 --ploidy "${options_type.population_model.P}"
103 ${options_type.population_model.J}
104 ${options_type.population_model.K}
105
106 #end if
107
108 ## REFERENCE ALLELE
109
110 #if str( $options_type.reference_allele.reference_allele_selector ) == "True":
111 ${options_type.reference_allele.Z}
112 --reference-quality "${options_type.reference_allele.reference_quality}"
113 #end if
114
115 ## ALLELE SCOPE
116
117 #if str( $options_type.allele_scope.allele_scope_selector ) == "True":
118 ${options_type.allele_scope.I}
119 ${options_type.allele_scope.i}
120 ${options_type.allele_scope.X}
121 ${options_type.allele_scope.u}
122 -n "${options_type.allele_scope.n}"
123 --haplotype-length "${options_type.allele_scope.haplotype_length}"
124 --min-repeat-size "${options_type.allele_scope.min_repeat_length}"
125 --min-repeat-entropy "${options_type.allele_scope.min_repeat_entropy}"
126 ${options_type.allele_scope.no_partial_observations}
127 #end if
128
129 ## REALIGNMENT
130
131 ${options_type.O}
132
133 ##INPUT FILTERS
134
135 #if str( $options_type.input_filters.input_filters_selector ) == "True":
136 ${options_type.input_filters.use_duplicate_reads}
137 -m "${options_type.input_filters.m}"
138 -q "${options_type.input_filters.q}"
139 -R "${options_type.input_filters.R}"
140 -Y "${options_type.input_filters.Y}"
141
142 #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "True":
143 -Q "${options_type.input_filters.mismatch_filters.Q}"
144 -U "${options_type.input_filters.mismatch_filters.U}"
145 -z "${options_type.input_filters.mismatch_filters.z}"
146 --read-snp-limit "${options_type.input_filters.mismatch_filters.read_snp_limit}"
147 #end if
148
149 -e "${options_type.input_filters.e}"
150 -F "${options_type.input_filters.F}"
151 -C "${options_type.input_filters.C}"
152 --min-alternate-qsum "${options_type.input_filters.min_alternate_qsum}"
153 -G "${options_type.input_filters.G}"
154 --min-coverage "${options_type.input_filters.min_coverage}"
155 #end if
156
157 ## POPULATION AND MAPPABILITY PRIORS
158
159 #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "True":
160 ${options_type.population_mappability_priors.k}
161 ${options_type.population_mappability_priors.w}
162 ${options_type.population_mappability_priors.V}
163 ${options_type.population_mappability_priors.a}
164 #end if
165
166 ## GENOTYPE LIKELIHOODS
167
168 #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "True":
169 --base-quality-cap "${$options_type.genotype_likelihoods.base_quality_cap}"
170 ${$options_type.genotype_likelihoods.experimental_gls}
171 --prob-contamination "${$options_type.genotype_likelihoods.prob_contamination}"
172 #end if
173
174 ## ALGORITHMIC FEATURES
175
176 #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "True":
177 ${options_type.algorithmic_features.report_genotype_likelihood_max}
178 -B "${options_type.algorithmic_features.B}"
179 --genotyping-max-banddepth "${options_type.algorithmic_features.genotyping_max_banddepth}"
180 -W "${options_type.algorithmic_features.W}"
181 ${options_type.algorithmic_features.N}
182
183 #if str( $options_type.algorithmic_features.genotype_variant_threshold.genotype_variant_threshold_selector ) == "True":
184 -S "${options_type.algorithmic_features.genotype_variant_threshold.S}"
185 #end if
186
187 ${options_type.algorithmic_features.j}
188 ${options_type.algorithmic_features.H}
189 -D "${options_type.algorithmic_features.D}"
190 ${options_type.algorithmic_features.genotype_qualities}
191 #end if
192 #end if
193
194 </command>
195
196 <macros>
197 <token name="@optional_inputs_outputs@">
198 ## This token gets injected in commane in two instances: when options_type.options_type_selector == "full" and "cline" ( cline is not supported at this time )
199
200 #if $options_type.optional_inputs.optional_inputs_selector:
201
202 #if $options_type.optional_inputs.output_trace_option:
37 --trace "${output_trace}" 203 --trace "${output_trace}"
204 #end if
205
206 #if $options_type.optional_inputs.output_failed_alleles_option:
207 --failed-alleles "${output_failed_alleles_bed}"
208 #end if
209
210 #if $options_type.optional_inputs.samples:
211 --samples "${options_type.optional_inputs.samples}"
212 #end if
213
214 #if $options_type.optional_inputs.populations:
215 --populations "${options_type.optional_inputs.populations}"
216 #end if
217
218 #if $options_type.optional_inputs.A:
219 --cnv-map "${options_type.optional_inputs.A}"
220 #end if
221
222 #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
223 --variant-input "input_variant_vcf.vcf.gz" ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_varinat_vcf file" section of the command line above
224 ${options_type.optional_inputs.input_variant_type.only_use_input_alleles}
225 #end if
226
227 #if $options_type.optional_inputs.haplotype_basis_alleles:
228 --haplotype-basis-alleles "${options_type.optional_inputs.haplotype_basis_alleles}"
229 #end if
230
231 #if $options_type.optional_inputs.observation_bias:
232 --observation-bias "${options_type.optional_inputs.observation_bias}"
233 #end if
234
235 #if $options_type.optional_inputs.contamination_estimates:
236 --contamination-estimates "${options_type.optional_inputs.contamination_estimates}"
237 #end if
238
38 #end if 239 #end if
39 #if $options_type.output_failed_alleles_option: 240 </token>
40 --failed-alleles "${output_failed_alleles_bed}" 241 <xml name="optional_file_inputs">
41 #end if 242 <conditional name="optional_inputs">
42 243 <param name="optional_inputs_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Do you want to provide additional inputs?" help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --varinat-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates" />
43 ##additional inputs 244 <when value="set">
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file": 245 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" help="--failed-alleles" />
45 --targets "${options_type.target_limit_type.input_target_bed}" 246 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" help="--trace"/>
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region": 247 <param name="samples" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" help="-s --samples; default=By default FreeBayes will analyze all samples in its input BAM files"/>
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}" 248 <param name="populations" type="data" format="txt" label="Populations File" optional="True" help="--populations; default=False. Each line of FILE should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" />
48 #end if 249 <param name="A" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" help="-A --cnv-map; default=copy number is set to as specified by --ploidy. Read a copy number map from the BED file FILE, which has the format: reference sequence, start, end, sample name, copy number ... for each region in each sample which does not have the default copy number as set by --ploidy."/>
49 #if $options_type.input_sample_file: 250 <conditional name="input_variant_type">
50 --samples "${options_type.input_sample_file}" 251 <param name="input_variant_type_selector" type="select" label="Provide variants file">
51 #end if 252 <option value="do_not_provide" selected="True">Do not provide</option>
52 #if $options_type.input_populations_file: 253 <option value="provide_vcf">Provide VCF file</option>
53 --populations "${options_type.input_populations_file}" 254 </param>
54 #end if 255 <when value="do_not_provide">
55 #if $options_type.input_cnv_map_bed: 256 <!-- Do nothing here -->
56 --cnv-map "${options_type.input_cnv_map_bed}" 257 </when>
57 #end if 258 <when value="provide_vcf">
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf": 259 <param name="input_variant_vcf" type="data" format="vcf_bgzip" label="Use variants reported in VCF file as input to the algorithm">
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}" 260 <conversion name="Tabixized_input" type="tabix" />
60 ${options_type.input_variant_type.only_use_input_alleles} 261 </param>
61 #end if 262 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
62 #if $options_type.haplotype_basis_alleles: 263 </when>
63 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}" 264 </conditional>
64 #end if 265 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" help="--haplotype-basis-alleles" />
65 266 <param name="report_monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="False" label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes." help="--report-monomorphic " />
66 267 <param name="observation_bias" optional="True" type="data" format="tabular" label="Load read length-dependent allele observation biases from" help="--observation-bias; The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" />
67 ##reporting 268 <param name="contamination_estimates" optional="True" type="data" format="tabular" label="Upload per-sample estimates of contamination from" help="--contamination-estimates; The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates." />
68 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set": 269 </when>
69 --pvar "${options_type.section_reporting_type.pvar}" 270 <when value="do_not_set">
70 ${options_type.section_reporting_type.show_reference_repeats} 271 <!-- do nothing -->
71 #end if 272 </when>
72 273 </conditional>
73 ##population model 274 </xml>
74 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set": 275 </macros>
75 --theta "${options_type.section_population_model_type.theta}" 276
76 --ploidy "${options_type.section_population_model_type.ploidy}"
77 ${options_type.section_population_model_type.pooled}
78 #end if
79
80 ##reference allele
81 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
82 --use-reference-allele
83 ${options_type.use_reference_allele_type.diploid_reference}
84 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
85 #end if
86
87 ##allele scope
88 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
89 ${options_type.section_allele_scope_type.no_snps}
90 ${options_type.section_allele_scope_type.no_indels}
91 ${options_type.section_allele_scope_type.no_mnps}
92 ${options_type.section_allele_scope_type.no_complex}
93 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
94 #if $options_type.section_allele_scope_type.max_complex_gap:
95 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
96 #end if
97 #end if
98
99 ##indel realignment
100 ${options_type.left_align_indels}
101
102 ##input filters
103 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
104 ${options_type.section_input_filters_type.use_duplicate_reads}
105 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters":
106 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}"
107 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}"
108 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}"
109 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters":
110 --standard-filters
111 #end if
112 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
113 #if $options_type.section_input_filters_type.read_mismatch_limit:
114 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
115 #end if
116 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
117 #if $options_type.section_input_filters_type.read_snp_limit:
118 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
119 #end if
120 #if $options_type.section_input_filters_type.read_indel_limit:
121 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
122 #end if
123 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
124 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
125 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
126 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
127 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
128 --min-coverage "${options_type.section_input_filters_type.min_coverage}"
129 #end if
130
131 ##bayesian priors
132 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
133 ${options_type.section_bayesian_priors_type.no_ewens_priors}
134 ${options_type.section_bayesian_priors_type.no_population_priors}
135 ${options_type.section_bayesian_priors_type.hwe_priors}
136 #end if
137
138 ##observation prior expectations
139 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
140 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
141 ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
142 #end if
143
144 ##algorithmic features
145 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
146 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
147 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
148 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
149 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
150 ${options_type.section_algorithmic_features_type.no_permute}
151 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
152 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
153 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
154 #end if
155 ${options_type.section_algorithmic_features_type.use_mapping_quality}
156 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
157 ${options_type.section_algorithmic_features_type.no_marginals}
158 #end if
159
160 #end if
161 </command>
162 <inputs> 277 <inputs>
163 <conditional name="reference_source"> 278 <conditional name="reference_source">
164 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> 279 <param name="reference_source_selector" type="select" label="Load reference genome from">
165 <option value="cached">Locally cached</option> 280 <option value="cached">Local cache</option>
166 <option value="history">History</option> 281 <option value="history">History</option>
167 </param> 282 </param>
168 <when value="cached"> 283 <when value="cached">
169 <repeat name="input_bams" title="Sample BAM file" min="1"> 284 <repeat name="input_bams" title="Sample BAM file" min="1">
170 <param name="input_bam" type="data" format="bam" label="BAM file"> 285 <param name="input_bam" type="data" format="bam" label="BAM file">
171 <validator type="unspecified_build" /> 286 <validator type="unspecified_build" />
172 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> 287 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
173 </param> 288 </param>
174 </repeat> 289 </repeat>
290
175 <param name="ref_file" type="select" label="Using reference genome"> 291 <param name="ref_file" type="select" label="Using reference genome">
176 <options from_data_table="sam_fa_indexes"> 292 <options from_data_table="fasta_indexes"></options>
177 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
178 </options>
179 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> 293 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
180 </param> 294 </param>
181 </when> 295 </when>
182 <when value="history"> <!-- FIX ME!!!! --> 296 <when value="history"> <!-- FIX ME!!!! -->
183 <repeat name="input_bams" title="Sample BAM file" min="1"> 297 <repeat name="input_bams" title="Sample BAM file" min="1">
184 <param name="input_bam" type="data" format="bam" label="BAM file" /> 298 <param name="input_bam" type="data" format="bam" label="BAM file" />
185 </repeat> 299 </repeat>
186 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> 300 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
187 </when> 301 </when>
188 </conditional> 302 </conditional>
189 303
190 <conditional name="options_type"> 304 <conditional name="target_limit_type">
191 <param name="options_type_selector" type="select" label="Basic or Advanced options"> 305 <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options">
192 <option value="basic" selected="True">Basic</option> 306 <option value="do_not_limit" selected="True">Do not limit</option>
193 <option value="advanced">Advanced</option> 307 <option value="limit_by_target_file">Limit by target file</option>
308 <option value="limit_by_region">Limit to region</option>
194 </param> 309 </param>
195 <when value="basic"> 310 <when value="do_not_limit">
196 <!-- Do nothing here --> 311 <!-- Do nothing here -->
197 </when> 312 </when>
198 <when value="advanced"> 313 <when value="limit_by_target_file">
199 314 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." help="-t --targets"/>
200 <!-- output --> 315 </when>
201 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" /> 316 <when value="limit_by_region">
202 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" /> 317 <param name="region_chromosome" type="text" label="Region Chromosome" value="" help="-r --region"/> <!--only once? -->
203 318 <param name="region_start" type="integer" label="Region Start" value="" />
204 319 <param name="region_end" type="integer" label="Region End" value="" />
205 <!-- input --> 320 </when>
206 <conditional name="target_limit_type"> 321 </conditional>
207 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets"> 322
208 <option value="do_not_limit" selected="True">Do not limit</option> 323 <conditional name="options_type">
209 <option value="limit_by_target_file">Limit by target file</option> 324 <param name="options_type_selector" type="select" label="Choose parameter selection level" help="Select how much control over the freebayes run you need" >
210 <option value="limit_by_region">Limit to region</option> 325 <option value="simple" selected="True">1:Simple diploid calling</option>
211 </param> 326 <option value="simple_w_filters">2:Simple diploid calling with filtering and coverage</option>
212 <when value="do_not_limit"> 327 <option value="naive">3:Frequency-based pooled calling</option>
213 <!-- Do nothing here --> 328 <option value="naive_w_filters">4:Frequency-based pooled calling with filtering and coverage</option>
329 <option value="full">5:Complete list of all options</option>
330 <!-- We will not alloow command line text boxes at this time
331 <option value="cline">6:Input parameters on the command line</option>
332 -->
333 </param>
334 <when value="full">
335
336 <expand macro="optional_file_inputs" /> <!-- see macros section -->
337
338 <!-- reporting -->
339
340 <conditional name="reporting">
341 <param name="reporting_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set reporting option?" help="Sets -P --pvar option" />
342 <when value="set">
343 <param name="pvar" type="float" value="0.0" label="Report sites if the probability that there is a polymorphism at the site is greater than" help="-P --pvar; default=0.0. Note that post-filtering is generally recommended over the use of this parameter. " />
214 </when> 344 </when>
215 <when value="limit_by_target_file"> 345 <when value="do_not_set">
216 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." /> 346 <!-- do nothing -->
217 </when>
218 <when value="limit_by_region">
219 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? -->
220 <param name="region_start" type="integer" label="Region Start" value="" />
221 <param name="region_end" type="integer" label="Region End" value="" />
222 </when> 347 </when>
223 </conditional> 348 </conditional>
224 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" /> 349
225 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" /> 350 <!-- population model -->
226 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" /> 351
227 <conditional name="input_variant_type"> 352 <conditional name="population_model">
228 <param name="input_variant_type_selector" type="select" label="Provide variants file"> 353 <param name="population_model_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set population model?" help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options " />
229 <option value="do_not_provide" selected="True">Do not provide</option> 354 <when value="set">
230 <option value="provide_vcf">Provide VCF file</option> 355 <param name="T" type="float" value="0.001" label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis" help="-T --theta; default = 0.001. This serves as the single parameter to the Ewens Sampling Formula prior model." />
231 </param> 356 <param name="P" type="integer" value="2" label="Set ploidy for the analysis" help="-p --ploidy; default=2" />
232 <when value="do_not_provide"> 357 <param name="J" type="boolean" truevalue="-J" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="-J --pooled-discrete; default=False. Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy." />
233 <!-- Do nothing here --> 358 <param name="K" type="boolean" truevalue="-K" falsevalue="" checked="False" label="Output all alleles which pass input filters, regardles of genotyping outcome or model" help="-K, --poled-continuous; default=False. " />
359 </when>
360 <when value="do_not_set">
361 <!-- do nothing -->
362 </when>
363 </conditional>
364
365 <!-- reference allele -->
366
367 <conditional name="reference_allele">
368 <param name="reference_allele_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Use reference allele?" help="Sets --use-reference-allele and --reference-quality options " />
369 <when value="set">
370 <param name="Z" type="boolean" truevalue="-Z" falsevalue="" checked="False" label="Include the reference allele in the analysis as if it is another sample from the same population" help="-Z --use-reference-allele; default=False" />
371 <param name="reference_quality" type="text" size="8" value="100,60" label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" help="--reference-quality; default=100,60 " />
372 </when>
373 <when value="do_not_set">
374 <!-- do nothing -->
375 </when>
376 </conditional>
377
378 <!-- allelic scope -->
379
380 <conditional name="allele_scope">
381 <param name="allele_scope_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set allelic scope?" help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options " />
382 <when value="set">
383 <param name="I" type="boolean" truevalue="-I" falsevalue="" checked="False" label="Ignore SNP alleles" help="-I --no-snps; default=False" />
384 <param name="i" type="boolean" truevalue="-i" falsevalue="" checked="False" label="Ignore indels alleles" help="-i --no-indels; default=False" />
385 <param name="X" type="boolean" truevalue="-X" falsevalue="" checked="False" label="Ignore multi-nucleotide polymorphisms, MNPs" help="-X --no-mnps; default=False" />
386 <param name="u" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)." help="-u --no-complex; default=False" />
387 <param name="n" type="integer" value="0" label="How many best SNP alleles to evaluate" help="-n --use-best-n-alleles; default=0 (all). Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" />
388 <param name="haplotype_length" type="integer" value="3" label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" help="-E --max-complex-gap --haplotype-length; default=3." />
389 <param name="min_repeat_length" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" help="--min-repeat-size; default=5." />
390 <param name="min_repeat_entropy" type="integer" value="0" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" help="--min-repeat-entropy; default=0 (off)." />
391 <param name="no_partial_observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="False" label="Exclude observations which do not fully span the dynamically-determined detection window" help="--no-partial-observations; default=use all observations, dividing partial support across matching haplotypes when generating haplotypes. " />
392 </when>
393 <when value="do_not_set">
394 <!-- do nothing -->
395 </when>
396 </conditional>
397
398 <!-- indel realignment -->
399
400 <param name="O" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Turn off left-alignment of indels?" help="-O --dont-left-align-indels; default=False (do left align). " />
401
402 <!-- input filters -->
403
404 <conditional name="input_filters">
405 <param name="input_filters_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set input filters?" help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -&#36;, -e, -0, -F, -C, -3, -G, and -&#33; options " />
406 <when value="set">
407 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis." help="-4 --use-duplicate-reads; default=False (exclude duplicates marked as such in alignments)." />
408 <param name="m" type="integer" value="1" label="Exclude alignments from analysis if they have a mapping quality less than" help="-m --min-mapping-quality; default=1" />
409 <param name="q" type="integer" value="0" label="Exclude alleles from analysis if their supporting base quality less than" help="-q --min-base-quality; default=0" />
410 <param name="R" type="integer" value="0" label="Consider any allele in which the sum of qualities of supporting observations is at least" help="-R --min-supporting-allele-qsum; default=0" />
411 <param name="Y" type="integer" value="0" label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" help="-Y --min-supporting-mapping-qsum; default=0" />
412 <conditional name="mismatch_filters">
413 <param name="mismatch_filters_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Perform mismatch filtering?" help="Sets -Q, -U, -z, and &#36; options" />
414 <when value="set">
415 <param name="Q" type="integer" value="10" label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" help="-Q --mismatch-base-quality-threshold; default=10" />
416 <param name="U" type="integer" value="1000" optional="True" label="Exclude reads with more than N mismatches where each mismatch has base quality >= Q (option above)" help="-U --read-mismatch-limit; default=~unbound" />
417 <param name="z" type="float" value="1.0" min="0.0" max="1.0" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= Q (second option above)" help="-z --read-max-mismatch-fraction; default=1.0" />
418 <param name="read_snp_limit" type="integer" value="1000" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= Q (third option abobe)" help="-$amp; --read-snp-limit N " />
419 </when>
420 <when value="do_not_set">
421 <!-- do nothing -->
422 </when>
423 </conditional>
424 <param name="e" type="integer" value="1000" label="Exclude reads with more than this number of separate gaps" help="-e --read-snp-limit; default=~unbounded" />
425 <param name="standard_filters" type="boolean" truevalue="-0" falsevalue="" checked="False" label="Use stringent input base and mapping quality filters" help="-0 --standard-filters; default=False. Equivalent to -m 30 -q 20 -R 0 -S 0" />
426 <param name="F" type="float" value="0.2" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" help="-F --min-alternate-fraction; default=0.2" />
427 <param name="C" type="integer" value="2" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" help="-C --min-alternate-count; default=2" />
428 <param name="min_alternate_qsum" type="integer" value="0" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" help="-3 --min-alternate-qsum; default=0" />
429 <param name="G" type="integer" value="1" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" help="-G --min-alternate-total N; default=1" />
430 <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " />
431 </when>
432 <when value="do_not_set">
433 <!-- do nothing -->
434 </when>
435 </conditional>
436
437 <!-- population and mappability priors -->
438
439 <conditional name="population_mappability_priors">
440 <param name="population_mappability_priors_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set population and mappability priors?" help="Sets -k, -w, -V, and -a options " />
441 <when value="set">
442 <param name="k" type="boolean" truevalue="-k" falsevalue="" checked="False" label="No population priors" help="-k --no-population-priors; default=False. Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors." />
443 <param name="w" type="boolean" truevalue="-w" falsevalue="" checked="False" label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" help="-w --hwe-priors-off; default=False" />
444 <param name="V" type="boolean" truevalue="-V" falsevalue="" checked="False" label="Disable incorporation of prior expectations about observations" help="-V --binomial-obs-priors-off; default=False. Uses read placement probability, strand balance probability, and read position (5&#39;'-3&#39;') probability." />
445 <param name="a" type="boolean" truevalue="-a" falsevalue="" checked="False" label="isable use of aggregate probability of observation balance between alleles as a component of the priors" help="-a --allele-balance-priors-off; default=False " />
446 </when>
447 <when value="do_not_set">
448 <!-- do nothing -->
449 </when>
450 </conditional>
451
452 <!-- genotype likelihoods -->
453
454 <conditional name="genotype_likelihoods">
455 <param name="genotype_likelihoods_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Tweak genotype likelihoods?" help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options. " />
456 <when value="set">
457 <param name="base_quality_cap" type="integer" value="0" label="Limit estimated observation quality by capping base quality at" help="--base-quality-cap" />
458 <param name="experimental_gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="False" label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" help="--experimental-gls; Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples." />
459 <param name="prob_contamination" type="float" value="10e-9" label="An estimate of contamination to use for all samples. " help="--prob-contamination; default=10e-9." />
460 </when>
461 <when value="do_not_set">
462 <!-- do nothing -->
463 </when>
464 </conditional>
465
466 <!-- algorithmic features -->
467
468 <conditional name="algorithmic_features">
469 <param name="algorithmic_features_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Tweak algorithmic features?" help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options " />
470 <when value="set">
471 <param name="report_genotype_likelihood_max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="False" label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods." help="--report-genotype-likelihood-max; default=False" />
472 <param name="B" type="integer" value="1000" label="Iterate no more than N times during genotyping step" help="-B --genotyping-max-iterations; default=1000." />
473 <param name="genotyping_max_banddepth" type="integer" value="6" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" help="--genotyping-max-banddepth; default=6" />
474 <param name="W" type="text" size="8" value="1,3" label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" help="-W --posterior-integration-limits; default=1,3" />
475 <param name="N" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" help="-N --exclude-unobserved-genotypes; default=False" />
476 <conditional name="genotype_variant_threshold">
477 <param name="genotype_variant_threshold_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Do you want to to limit posterior integration" help="-S --genotype-variant-threshold" />
478 <when value="do_not_set">
479 <!-- do nothing -->
234 </when> 480 </when>
235 <when value="provide_vcf"> 481 <when value="set">
236 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" /> 482 <param name="S" value="" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample." help="-S --genotype-variant-threshold; default=~unbounded" />
237 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
238 </when> 483 </when>
239 </conditional> 484 </conditional>
240 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" /> 485 <param name="j" type="boolean" truevalue="-j" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" help="-j --use-mapping-quality; default=False" />
241 486 <param name="H" type="boolean" truevalue="-H" falsevalue="" checked="False" label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel" help="-H --harmonic-indel-quality; default=use a minimum Base Quality in flanking sequence." />
242 <!-- reporting --> 487 <param name="D" type="float" value="0.9" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" help="-D --read-dependence-factor; default=0.9." />
243 <conditional name="section_reporting_type"> 488 <param name="genotype_qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="False" label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" help="-= --genotype-qualities; default=False " />
244 <param name="section_reporting_type_selector" type="select" label="Set Reporting options"> 489 </when>
245 <option value="do_not_set" selected="True">Do not set</option> 490 <when value="do_not_set">
246 <option value="set">Set</option> 491 <!-- do nothing -->
247 </param>
248 <when value="do_not_set">
249 <!-- do nothing here -->
250 </when>
251 <when value="set">
252 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" />
253 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" />
254 </when>
255 </conditional>
256
257
258 <!-- population model -->
259 <conditional name="section_population_model_type">
260 <param name="section_population_model_type_selector" type="select" label="Set population model options">
261 <option value="do_not_set" selected="True">Do not set</option>
262 <option value="set">Set</option>
263 </param>
264 <when value="do_not_set">
265 <!-- do nothing here -->
266 </when>
267 <when value="set">
268 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/>
269 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" />
270 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." />
271 </when>
272 </conditional>
273
274 <!-- reference allele -->
275 <conditional name="use_reference_allele_type">
276 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis">
277 <option value="do_not_include_reference_allele" selected="True">Do not include</option>
278 <option value="include_reference_allele">Include</option>
279 </param>
280 <when value="do_not_include_reference_allele">
281 <!-- Do nothing here -->
282 </when>
283 <when value="include_reference_allele">
284 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" />
285 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" />
286 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" />
287 </when>
288 </conditional>
289
290 <!-- allele scope -->
291 <conditional name="section_allele_scope_type">
292 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options">
293 <option value="do_not_set" selected="True">Do not set</option>
294 <option value="set">Set</option>
295 </param>
296 <when value="do_not_set">
297 <!-- do nothing here -->
298 </when>
299 <when value="set">
300 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" />
301 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" />
302 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" />
303 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" />
304 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" />
305 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/>
306 </when>
307 </conditional>
308
309 <!-- indel realignment -->
310 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" />
311
312 <!-- input filters -->
313 <conditional name="section_input_filters_type">
314 <param name="section_input_filters_type_selector" type="select" label="Set input filters options">
315 <option value="do_not_set" selected="True">Do not set</option>
316 <option value="set">Set</option>
317 </param>
318 <when value="do_not_set">
319 <!-- do nothing here -->
320 </when>
321 <when value="set">
322 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" />
323 <conditional name="quality_filter_type">
324 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters">
325 <option value="standard_filters" selected="True">Apply standard</option>
326 <option value="apply_filters">Apply specified</option>
327 </param>
328 <when value="standard_filters">
329 <!-- Do nothing here --> <!-- standard-filters -->
330 </when>
331 <when value="apply_filters">
332 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" />
333 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" />
334 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" />
335 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" />
336 </when>
337 </conditional>
338 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is &gt;=" value="10" />
339 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
340 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="1.0" />
341 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
342 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" />
343 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" />
344 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" />
345 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" />
346 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" />
347 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" />
348 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" />
349 </when>
350 </conditional>
351
352
353 <!-- bayesian priors -->
354 <conditional name="section_bayesian_priors_type">
355 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options">
356 <option value="do_not_set" selected="True">Do not set</option>
357 <option value="set">Set</option>
358 </param>
359 <when value="do_not_set">
360 <!-- do nothing here -->
361 </when>
362 <when value="set">
363 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" />
364 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" />
365 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
366 </when>
367 </conditional>
368
369 <!-- observation prior expectations -->
370 <conditional name="section_observation_prior_expectations_type">
371 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options">
372 <option value="do_not_set" selected="True">Do not set</option>
373 <option value="set">Set</option>
374 </param>
375 <when value="do_not_set">
376 <!-- do nothing here -->
377 </when>
378 <when value="set">
379 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" />
380 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" />
381 </when>
382 </conditional>
383
384
385 <!-- algorithmic features -->
386 <conditional name="section_algorithmic_features_type">
387 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options">
388 <option value="do_not_set" selected="True">Do not set</option>
389 <option value="set">Set</option>
390 </param>
391 <when value="do_not_set">
392 <!-- do nothing here -->
393 </when>
394 <when value="set">
395 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" />
396 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" />
397 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" />
398 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" />
399 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" />
400 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" />
401 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" />
402 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" />
403 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" />
404 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" />
405 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" />
406 </when>
407 </conditional>
408
409
410 </when> 492 </when>
411 </conditional> 493 </conditional>
412 494 </when>
495 <when value="simple">
496 <!-- do nothing -->
497 </when>
498 <when value="simple_w_filters">
499 <!-- add standard-filters to command line -->
500 <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " />
501 </when>
502 <when value="naive">
503 <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic -->
504 </when>
505 <when value="naive_w_filters">
506 <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters-->
507 <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " />
508 </when>
509
510 <!-- We will not allow command line textboxes at this time
511 <when value="cline">
512
513 <expand macro="optional_file_inputs" />
514
515 <param name="cline" size="60" type="text" value="-m 20 -q 30" label="Type command line tags here" help="All paremeters that DO NOT involve filenames can be typed here. Use &quot;Do you want to provide additional inputs?&quot; section above to control input and output files. For full syntax check help section below">
516 <sanitizer>
517 <valid initial="string.printable">
518 <remove value="&apos;"/>
519 </valid>
520 <mapping initial="none">
521 <add source="&apos;" target="__sq__"/>
522 </mapping>
523 </sanitizer>
524 </param>
525 </when>
526 -->
527
528 </conditional>
529
413 </inputs> 530 </inputs>
414 <outputs> 531 <outputs>
415 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> 532 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
416 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> 533 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
417 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter> 534 <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] is True and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter>
418 </data> 535 </data>
419 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> 536 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter> 537 <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] is True and options_type['optional_inputs']['output_trace_option'] is True</filter>
421 </data> 538 </data>
422 </outputs> 539 </outputs>
423 <tests> 540 <tests>
424 <test> 541 <test>
425 <param name="reference_source_selector" value="history" /> 542 <param name="reference_source_selector" value="history" />
426 <param name="ref_file" ftype="fasta" value="phiX.fasta"/> 543 <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
427 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/> 544 <param name="input_bam" ftype="bam" value="freebayes-phix174.bam"/>
428 <param name="options_type_selector" value="basic"/> 545 <param name="options_type_selector" value="simple"/>
429 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/> 546 <output name="output_vcf" file="freebayes-phix174-test1.vcf" compare="contains"/>
430 </test> 547 </test>
431 </tests> 548 </tests>
549 <stdio>
550 <exit_code range="1:" />
551 </stdio>
432 <help> 552 <help>
433 **What it does** 553 **What it does**
434 554
435 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file. 555 FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment.
436 556
437 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners. 557 See https://github.com/ekg/freebayes for details on FreeBayes.
438 558
439 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development. 559 This Galaxy instance of FreeBayes corresponds to release 0.9.18
440
441 Go `here &lt;http://bioinformatics.bc.edu/marthlab/FreeBayes&gt;`_ for details on FreeBayes.
442 560
443 ------ 561 ------
444 562
445 **Inputs** 563 **Description**
446 564
447 FreeBayes accepts an input aligned BAM file. 565 Privided BAM file(s) and a reference. FreeBayes will provide VCF output on standard out describing SNPs, indels, and complex variants in samples in the input alignments.
448 566
449 567 By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F). These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data.
450 **Outputs** 568
451 569 FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read. The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be set to half the read length.
452 The output is in the VCF format. 570
571 Ploidy may be set to any level (-p), but by default all samples are assumed to be diploid. FreeBayes can model per-sample and per-region variation in copy-number (-A) using a copy-number variation map.
572
573 FreeBayes can act as a frequency-based pooled caller and describe variants and haplotypes in terms of observation frequency rather than called genotypes. To do so, use --pooled-continuous and set input filters to a suitable level. Allele observation counts will be described by AO and RO fields in the VCF output.
453 574
454 ------- 575 -------
455 576
456 **Settings**:: 577 **Galaxy-specific options**
457 578
458 input and output: 579 Galaxy allows six levels of control over FreeBayes options provided by **Choose parameter selection level** menu option. These are:
459 580
460 -b --bam FILE Add FILE to the set of BAM files to be analyzed. 581 1. *Simple diploid calling*: The simples possible FreeBayes application. Equvalent of using FreeBayes with only a BAM input and no other parameter options.
461 -c --stdin Read BAM input on stdin. 582 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-varinat-threshold 0) and --min-coverage.
462 -v --vcf FILE Output VCF-format results to FILE. 583 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling varinats in mixtures such as viral, bacterial, or organellar genomes.
463 -f --fasta-reference FILE 584 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2.
464 Use FILE as the reference sequence for analysis. 585 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy widgets.
465 An index file (FILE.fai) will be created if none exists. 586
466 If neither --targets nor --region are specified, FreeBayes 587 -----
467 will analyze every position in this reference. 588
589 **FreeBayes options**
590
591 .. class:: infomark
592
593 Note that each Galaxy parameter widget corresponding to command line flags listed below:
594
595 Input and output::
596
468 -t --targets FILE 597 -t --targets FILE
469 Limit analysis to targets listed in the BED-format FILE. 598 Limit analysis to targets listed in the BED-format FILE.
470 -r --region &lt;chrom&gt;:&lt;start_position&gt;..&lt;end_position&gt; 599 -r --region chrom:start_position-end_position
471 Limit analysis to the specified region, 0-base coordinates, 600 Limit analysis to the specified region, 0-base coordinates,
472 end_position not included (same as BED format). 601 end_position included. Either '-' or '..' maybe used as a separator.
473 -s --samples FILE 602 -s --samples FILE
474 Limit analysis to samples listed (one per line) in the FILE. 603 Limit analysis to samples listed (one per line) in the FILE.
475 By default FreeBayes will analyze all samples in its input 604 By default FreeBayes will analyze all samples in its input
476 BAM files. 605 BAM files.
477 --populations FILE 606 --populations FILE
482 Read a copy number map from the BED file FILE, which has 611 Read a copy number map from the BED file FILE, which has
483 the format: 612 the format:
484 reference sequence, start, end, sample name, copy number 613 reference sequence, start, end, sample name, copy number
485 ... for each region in each sample which does not have the 614 ... for each region in each sample which does not have the
486 default copy number as set by --ploidy. 615 default copy number as set by --ploidy.
487 -L --trace FILE Output an algorithmic trace to FILE. 616 --trace FILE Output an algorithmic trace to FILE.
488 --failed-alleles FILE 617 --failed-alleles FILE
489 Write a BED file of the analyzed positions which do not 618 Write a BED file of the analyzed positions which do not
490 pass --pvar to FILE. 619 pass --pvar to FILE.
491 -@ --variant-input VCF 620 -@ --variant-input VCF
492 Use variants reported in VCF file as input to the algorithm. 621 Use variants reported in VCF file as input to the algorithm.
493 A report will be generated for every record in the VCF file. 622 Variants in this file will be treated as putative variants
623 even if there is not enough support in the data to pass
624 input filters.
494 -l --only-use-input-alleles 625 -l --only-use-input-alleles
495 Only provide variant calls and genotype likelihoods for sites 626 Only provide variant calls and genotype likelihoods for sites
496 and alleles which are provided in the VCF input, and provide 627 and alleles which are provided in the VCF input, and provide
497 output in the VCF for all input alleles, not just those which 628 output in the VCF for all input alleles, not just those which
498 have support in the data. 629 have support in the data.
499 --haplotype-basis-alleles VCF 630 --haplotype-basis-alleles VCF
500 When specified, only variant alleles provided in this input 631 When specified, only variant alleles provided in this input
501 VCF will be used for the construction of complex or haplotype 632 VCF will be used for the construction of complex or haplotype
502 alleles. 633 alleles.
503 634 --report-all-haplotype-alleles
504 reporting: 635 At sites where genotypes are made over haplotype alleles,
636 provide information about all alleles in output, not only
637 those which are called.
638 --report-monomorphic
639 Report even loci which appear to be monomorphic, and report all
640 considered alleles, even those which are not in called genotypes.
641 Loci which do not have any potential alternates have '.' for ALT.
642
643 Reporting::
505 644
506 -P --pvar N Report sites if the probability that there is a polymorphism 645 -P --pvar N Report sites if the probability that there is a polymorphism
507 at the site is greater than N. default: 0.0001 646 at the site is greater than N. default: 0.0. Note that post-
508 -_ --show-reference-repeats 647 filtering is generally recommended over the use of this parameter.
509 Calculate and show information about reference repeats in 648
510 the VCF output. 649 Population model::
511
512 population model:
513 650
514 -T --theta N The expected mutation rate or pairwise nucleotide diversity 651 -T --theta N The expected mutation rate or pairwise nucleotide diversity
515 among the population under analysis. This serves as the 652 among the population under analysis. This serves as the
516 single parameter to the Ewens Sampling Formula prior model 653 single parameter to the Ewens Sampling Formula prior model
517 default: 0.001 654 default: 0.001
518 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2 655 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
519 -J --pooled Assume that samples result from pooled sequencing. 656 -J --pooled-discrete
657 Assume that samples result from pooled sequencing.
658 Model pooled samples using discrete genotypes across pools.
520 When using this flag, set --ploidy to the number of 659 When using this flag, set --ploidy to the number of
521 alleles in each sample. 660 alleles in each sample or use the --cnv-map to define
522 661 per-sample ploidy.
523 reference allele: 662 -K --pooled-continuous
663 Output all alleles which pass input filters, regardles of
664 genotyping outcome or model.
665
666 Reference allele::
524 667
525 -Z --use-reference-allele 668 -Z --use-reference-allele
526 This flag includes the reference allele in the analysis as 669 This flag includes the reference allele in the analysis as
527 if it is another sample from the same population. 670 if it is another sample from the same population.
528 -H --diploid-reference
529 If using the reference sequence as a sample (-Z),
530 treat it as diploid. default: false (reference is haploid)
531 --reference-quality MQ,BQ 671 --reference-quality MQ,BQ
532 Assign mapping quality of MQ to the reference allele at each 672 Assign mapping quality of MQ to the reference allele at each
533 site and base quality of BQ. default: 100,60 673 site and base quality of BQ. default: 100,60
534 674
535 allele scope: 675 Allele scope::
536 676
537 -I --no-snps Ignore SNP alleles. 677 -I --no-snps Ignore SNP alleles.
538 -i --no-indels Ignore insertion and deletion alleles. 678 -i --no-indels Ignore insertion and deletion alleles.
539 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs. 679 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
540 -u --no-complex Ignore complex events (composites of other classes). 680 -u --no-complex Ignore complex events (composites of other classes).
541 -n --use-best-n-alleles N 681 -n --use-best-n-alleles N
542 Evaluate only the best N SNP alleles, ranked by sum of 682 Evaluate only the best N SNP alleles, ranked by sum of
543 supporting quality scores. (Set to 0 to use all; default: all) 683 supporting quality scores. (Set to 0 to use all; default: all)
544 -E --max-complex-gap N 684 -E --max-complex-gap N
545 Allow complex alleles with contiguous embedded matches of up 685 --haplotype-length N
546 to this length. 686 Allow haplotype calls with contiguous embedded matches of up
547 687 to this length. (default: 3)
548 indel realignment: 688 --min-repeat-size N
549 689 When assembling observations across repeats, require the total repeat
550 -O --left-align-indels 690 length at least this many bp. (default: 5)
551 Left-realign and merge gaps embedded in reads. default: false 691 --min-repeat-entropy N
552 692 To detect interrupted repeats, build across sequence until it has
553 input filters: 693 entropy > N bits per bp. (default: 0, off)
694 --no-partial-observations
695 Exclude observations which do not fully span the dynamically-determined
696 detection window. (default, use all observations, dividing partial
697 support across matching haplotypes when generating haplotypes.)
698
699 Indel realignment::
700
701 -O --dont-left-align-indels
702 Turn off left-alignment of indels, which is enabled by default.
703
704 Input filters::
554 705
555 -4 --use-duplicate-reads 706 -4 --use-duplicate-reads
556 Include duplicate-marked alignments in the analysis. 707 Include duplicate-marked alignments in the analysis.
557 default: exclude duplicates 708 default: exclude duplicates marked as such in alignments
558 -m --min-mapping-quality Q 709 -m --min-mapping-quality Q
559 Exclude alignments from analysis if they have a mapping 710 Exclude alignments from analysis if they have a mapping
560 quality less than Q. default: 30 711 quality less than Q. default: 1
561 -q --min-base-quality Q 712 -q --min-base-quality Q
562 Exclude alleles from analysis if their supporting base 713 Exclude alleles from analysis if their supporting base
563 quality is less than Q. default: 20 714 quality is less than Q. default: 0
564 -R --min-supporting-quality MQ,BQ 715 -R --min-supporting-allele-qsum Q
565 In order to consider an alternate allele, at least one supporting 716 Consider any allele in which the sum of qualities of supporting
566 alignment must have mapping quality MQ, and one supporting 717 observations is at least Q. default: 0
567 allele must have base quality BQ. default: 0,0, unset 718 -Y --min-supporting-mapping-qsum Q
719 Consider any allele in which and the sum of mapping qualities of
720 supporting reads is at least Q. default: 0
568 -Q --mismatch-base-quality-threshold Q 721 -Q --mismatch-base-quality-threshold Q
569 Count mismatches toward --read-mismatch-limit if the base 722 Count mismatches toward --read-mismatch-limit if the base
570 quality of the mismatch is &gt;= Q. default: 10 723 quality of the mismatch is >= Q. default: 10
571 -U --read-mismatch-limit N 724 -U --read-mismatch-limit N
572 Exclude reads with more than N mismatches where each mismatch 725 Exclude reads with more than N mismatches where each mismatch
573 has base quality &gt;= mismatch-base-quality-threshold. 726 has base quality >= mismatch-base-quality-threshold.
574 default: ~unbounded 727 default: ~unbounded
575 -z --read-max-mismatch-fraction N 728 -z --read-max-mismatch-fraction N
576 Exclude reads with more than N [0,1] fraction of mismatches where 729 Exclude reads with more than N [0,1] fraction of mismatches where
577 each mismatch has base quality &gt;= mismatch-base-quality-threshold 730 each mismatch has base quality >= mismatch-base-quality-threshold
578 default: 1.0 731 default: 1.0
579 -$ --read-snp-limit N 732 -$ --read-snp-limit N
580 Exclude reads with more than N base mismatches, ignoring gaps 733 Exclude reads with more than N base mismatches, ignoring gaps
581 with quality &gt;= mismatch-base-quality-threshold. 734 with quality >= mismatch-base-quality-threshold.
582 default: ~unbounded 735 default: ~unbounded
583 -e --read-indel-limit N 736 -e --read-indel-limit N
584 Exclude reads with more than N separate gaps. 737 Exclude reads with more than N separate gaps.
585 default: ~unbounded 738 default: ~unbounded
586 -0 --standard-filters Use stringent input base and mapping quality filters 739 -0 --standard-filters Use stringent input base and mapping quality filters
587 Equivalent to -m 30 -q 20 -R 0 -S 0 740 Equivalent to -m 30 -q 20 -R 0 -S 0
588 -x --indel-exclusion-window
589 Ignore portions of alignments this many bases from a
590 putative insertion or deletion allele. default: 0
591 -F --min-alternate-fraction N 741 -F --min-alternate-fraction N
592 Require at least this fraction of observations supporting 742 Require at least this fraction of observations supporting
593 an alternate allele within a single individual in the 743 an alternate allele within a single individual in the
594 in order to evaluate the position. default: 0.0 744 in order to evaluate the position. default: 0.2
595 -C --min-alternate-count N 745 -C --min-alternate-count N
596 Require at least this count of observations supporting 746 Require at least this count of observations supporting
597 an alternate allele within a single individual in order 747 an alternate allele within a single individual in order
598 to evaluate the position. default: 1 748 to evaluate the position. default: 2
599 -3 --min-alternate-qsum N 749 -3 --min-alternate-qsum N
600 Require at least this sum of quality of observations supporting 750 Require at least this sum of quality of observations supporting
601 an alternate allele within a single individual in order 751 an alternate allele within a single individual in order
602 to evaluate the position. default: 0 752 to evaluate the position. default: 0
603 -G --min-alternate-total N 753 -G --min-alternate-total N
605 an alternate allele within the total population in order 755 an alternate allele within the total population in order
606 to use the allele in analysis. default: 1 756 to use the allele in analysis. default: 1
607 -! --min-coverage N 757 -! --min-coverage N
608 Require at least this coverage to process a site. default: 0 758 Require at least this coverage to process a site. default: 0
609 759
610 bayesian priors: 760 Population priors::
611 761
612 -Y --no-ewens-priors
613 Turns off the Ewens' Sampling Formula component of the priors.
614 -k --no-population-priors 762 -k --no-population-priors
615 Equivalent to --pooled --no-ewens-priors 763 Equivalent to --pooled-discrete --hwe-priors-off and removal of
616 -w --hwe-priors Use the probability of the combination arising under HWE given 764 Ewens Sampling Formula component of priors.
617 the allele frequency as estimated by observation frequency. 765
618 766 Mappability priors::
619 observation prior expectations: 767
620 768 -w --hwe-priors-off
621 -V --binomial-obs-priors 769 Disable estimation of the probability of the combination
622 Incorporate expectations about osbervations into the priors, 770 arising under HWE given the allele frequency as estimated
771 by observation frequency.
772 -V --binomial-obs-priors-off
773 Disable incorporation of prior expectations about observations.
623 Uses read placement probability, strand balance probability, 774 Uses read placement probability, strand balance probability,
624 and read position (5'-3') probability. 775 and read position (5'-3') probability.
625 -a --allele-balance-priors 776 -a --allele-balance-priors-off
626 Use aggregate probability of observation balance between alleles 777 Disable use of aggregate probability of observation balance between alleles
627 as a component of the priors. Best for observations with minimal 778 as a component of the priors.
628 inherent reference bias. 779
629 780 Genotype likelihoods::
630 algorithmic features: 781
631 782 --observation-bias FILE
632 -M --site-selection-max-iterations N 783 Read length-dependent allele observation biases from FILE.
633 Uses hill-climbing algorithm to search posterior space for N 784 The format is [length] [alignment efficiency relative to reference]
634 iterations to determine if the site should be evaluated. Set to 0 785 where the efficiency is 1 if there is no relative observation bias.
635 to prevent use of this algorithm for site selection, and 786 --base-quality-cap Q
636 to a low integer for improvide site selection at a slight 787 Limit estimated observation quality by capping base quality at Q.
637 performance penalty. default: 5. 788 --experimental-gls
789 Generate genotype likelihoods using 'effective base depth' metric
790 qual = 1-BaseQual * 1-MapQual. Incorporate partial observations.
791 This is the default when contamination estimates are provided.
792 Optimized for diploid samples.
793 --prob-contamination F
794 An estimate of contamination to use for all samples. default: 10e-9
795 --contamination-estimates FILE
796 A file containing per-sample estimates of contamination, such as
797 those generated by VerifyBamID. The format should be:
798 sample p(read=R|genotype=AR) p(read=A|genotype=AA)
799 Sample '*' can be used to set default contamination estimates.
800
801 Algorithmic features::
802
803 --report-genotype-likelihood-max
804 Report genotypes using the maximum-likelihood estimate provided
805 from genotype likelihoods.
638 -B --genotyping-max-iterations N 806 -B --genotyping-max-iterations N
639 Iterate no more than N times during genotyping step. default: 25. 807 Iterate no more than N times during genotyping step. default: 1000.
640 --genotyping-max-banddepth N 808 --genotyping-max-banddepth N
641 Integrate no deeper than the Nth best genotype by likelihood when 809 Integrate no deeper than the Nth best genotype by likelihood when
642 genotyping. default: 6. 810 genotyping. default: 6.
643 -W --posterior-integration-limits N,M 811 -W --posterior-integration-limits N,M
644 Integrate all genotype combinations in our posterior space 812 Integrate all genotype combinations in our posterior space
645 which include no more than N samples with their Mth best 813 which include no more than N samples with their Mth best
646 data likelihood. default: 1,3. 814 data likelihood. default: 1,3.
647 -K --no-permute
648 Do not scale prior probability of genotype combination given allele
649 frequency by the number of permutations of included genotypes.
650 -N --exclude-unobserved-genotypes 815 -N --exclude-unobserved-genotypes
651 Skip sample genotypings for which the sample has no supporting reads. 816 Skip sample genotypings for which the sample has no supporting reads.
652 -S --genotype-variant-threshold N 817 -S --genotype-variant-threshold N
653 Limit posterior integration to samples where the second-best 818 Limit posterior integration to samples where the second-best
654 genotype likelihood is no more than log(N) from the highest 819 genotype likelihood is no more than log(N) from the highest
655 genotype likelihood for the sample. default: ~unbounded 820 genotype likelihood for the sample. default: ~unbounded
656 -j --use-mapping-quality 821 -j --use-mapping-quality
657 Use mapping quality of alleles when calculating data likelihoods. 822 Use mapping quality of alleles when calculating data likelihoods.
823 -H --harmonic-indel-quality
824 Use a weighted sum of base qualities around an indel, scaled by the
825 distance from the indel. By default use a minimum BQ in flanking sequence.
658 -D --read-dependence-factor N 826 -D --read-dependence-factor N
659 Incorporate non-independence of reads by scaling successive 827 Incorporate non-independence of reads by scaling successive
660 observations by this factor during data likelihood 828 observations by this factor during data likelihood
661 calculations. default: 0.9 829 calculations. default: 0.9
662 -= --no-marginals 830 -= --genotype-qualities
663 Do not calculate the marginal probability of genotypes. Saves 831 Calculate the marginal probability of genotypes and report as GQ in
664 time and improves scaling performance in large populations. 832 each sample field in the VCF output.
665 833
666 834
667 ------ 835 ------
668 836
669 **Citation** 837 **Citation**
670 838
671 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing &lt;http://arxiv.org/abs/1207.3907&gt;`_. 839 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing &lt;http://arxiv.org/abs/1207.3907&gt;`_.
672 840
673 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* 841 The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko.
674 842
675 </help> 843 </help>
844
845 <citations>
846 <citation type="bibtex">@misc{1207.3907,
847 Author = {Erik Garrison},
848 Title = {Haplotype-based variant detection from short-read sequencing},
849 Year = {2012},
850 Eprint = {arXiv:1207.3907},
851 url = {http://arxiv.org/abs/1207.3907},
852 }</citation>
853 </citations>
676 </tool> 854 </tool>