Mercurial > repos > devteam > freebayes
diff freebayes.xml @ 11:f14331392369 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit 24f33bda62a7b6771ad42a39fe8c683e09f6d8b8"
author | iuc |
---|---|
date | Wed, 06 Nov 2019 17:02:36 -0500 |
parents | 2fb16f415220 |
children | 2667d6a97b50 |
line wrap: on
line diff
--- a/freebayes.xml Wed Oct 25 08:29:29 2017 -0400 +++ b/freebayes.xml Wed Nov 06 17:02:36 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@-0"> +<tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@"> <description>bayesian genetic variant detector</description> <macros> <import>macros.xml</import> @@ -77,26 +77,29 @@ ## Outputs --vcf './vcf_output/part_\$i.vcf' + ## Coverage + #if str($coverage_options.coverage_options_selector) == "set": + @COVERAGE@ + #end if + ##advanced options #if str( $options_type.options_type_selector ) == "simple": #pass #elif str( $options_type.options_type_selector ) == "simple_w_filters": --standard-filters - --min-coverage ${options_type.min_coverage} #elif str( $options_type.options_type_selector ) == "naive": --haplotype-length 0 --min-alternate-count 1 - --min-alternate-fraction 0 + --min-alternate-fraction 0.05 --pooled-continuous --report-monomorphic #elif str( $options_type.options_type_selector ) == "naive_w_filters": --haplotype-length 0 --min-alternate-count 1 - --min-alternate-fraction 0 + --min-alternate-fraction 0.05 --pooled-continuous --report-monomorphic --standard-filters - --min-coverage ${options_type.min_coverage} #elif str( $options_type.options_type_selector ) == "full": #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set': ${options_type.optional_inputs.report_monomorphic} @@ -189,7 +192,6 @@ --read-snp-limit ${options_type.input_filters.mismatch_filters.read_snp_limit} #end if - --min-coverage ${options_type.input_filters.min_coverage} --min-alternate-qsum ${options_type.input_filters.min_alternate_qsum} #end if @@ -305,6 +307,17 @@ <param name="region_end" type="integer" label="Region End" value="" /> </when> </conditional> + <conditional name="coverage_options"> + <param name="coverage_options_selector" type="select" label="Read coverage" + help="Sets --min-coverage, --limit-coverage, and --skip-coverage"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Specify coverage options</option> + </param> + <when value="set"> + <expand macro="par_min_cov" /> + </when> + <when value="do_not_set" /> + </conditional> <conditional name="options_type"> <param name="options_type_selector" type="select" label="Choose parameter selection level" help="Select how much control over the freebayes run you need"> @@ -315,6 +328,7 @@ <option value="full">5. Full list of options</option> </param> <when value="full"> + <conditional name="optional_inputs"> <param name="optional_inputs_selector" type="select" label="Additional inputs" help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --variant-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates"> @@ -439,7 +453,7 @@ label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" /> <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" /> - <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="0" + <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" /> <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false" label="Exclude observations which do not fully span the dynamically-determined detection window" @@ -496,7 +510,7 @@ <param name="standard_filters" argument="--standard-filters" type="boolean" truevalue="-0" falsevalue="" checked="false" label="Use stringent input base and mapping quality filters" help="Equivalent to -m 30 -q 20 -R 0 -S 0" /> - <param name="F" argument="--min-alternate-fraction" type="float" value="0.2" + <param name="F" argument="--min-alternate-fraction" type="float" value="0.05" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" /> <param name="C" argument="--min-alternate-count" type="integer" value="2" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" /> @@ -504,7 +518,6 @@ label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" /> <param name="G" argument="--min-alternate-total" type="integer" value="1" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" /> - <expand macro="par_min_cov" /> </when> <when value="do_not_set" /> </conditional> @@ -585,17 +598,9 @@ </conditional> </when> <when value="simple" /> - <when value="simple_w_filters"> - <!-- add standard-filters to command line --> - <expand macro="par_min_cov" /> - </when> - <when value="naive"> - <!-- build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic --> - </when> - <when value="naive_w_filters"> - <!-- build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters--> - <expand macro="par_min_cov" /> - </when> + <when value="simple_w_filters" /> + <when value="naive" /> + <when value="naive_w_filters" /> </conditional> </inputs> <outputs> @@ -622,6 +627,7 @@ <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> <param name="options_type_selector" value="naive_w_filters"/> + <param name="coverage_options_selector" value="set" /> <param name="min_coverage" value="14"/> <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" /> </test> @@ -631,6 +637,7 @@ <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> <param name="options_type_selector" value="naive_w_filters"/> + <param name="coverage_options_selector" value="set" /> <param name="min_coverage" value="14"/> <output name="output_vcf" file="freebayes-phix174-test3.vcf" lines_diff="4" /> </test> @@ -644,8 +651,38 @@ <param name="P" value="1"/> <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" /> </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/> + <param name="options_type_selector" value="simple"/> + <param name="coverage_options_selector" value="set" /> + <param name="min_coverage" value="250" /> + <output name="output_vcf" file="freebayes-hxb2-test5.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/> + <param name="options_type_selector" value="simple"/> + <param name="coverage_options_selector" value="set" /> + <param name="limit_coverage" value="400" /> + <output name="output_vcf" file="freebayes-hxb2-test6.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/> + <param name="options_type_selector" value="simple"/> + <param name="coverage_options_selector" value="set" /> + <param name="skip_coverage" value="100" /> + <output name="output_vcf" file="freebayes-hxb2-test7.vcf" lines_diff="4" /> + </test> </tests> - <help> + <help><![CDATA[ **What it does** FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment. @@ -680,10 +717,212 @@ ------ +**Command-line parameters** + +**Input**:: + + --bam FILE The file or set of BAM files to be analyzed. + --bam-list FILE A file containing a list of BAM files to be analyzed. + + --stdin Read BAM input on stdin. + --fasta-reference FILE Use FILE as the reference sequence for analysis. + An index file (FILE.fai) will be created if none exists. + If neither --targets nor --region are specified, FreeBayes + will analyze every position in this reference. + --targets FILE Limit analysis to targets listed in the BED-format FILE. + --region <chrom>:<start>-<end> Limit analysis to the specified region, 0-base coordinates, + end_position not included (same as BED format). + Either '-' or '..' maybe used as a separator. + --samples FILE Limit analysis to samples listed (one per line) in the FILE. + By default FreeBayes will analyze all samples in its input + BAM files. + --populations FILE Each line of FILE should list a sample and a population which + it is part of. The population-based bayesian inference model + will then be partitioned on the basis of the populations. + --cnv-map FILE Read a copy number map from the BED file FILE, which has + either a sample-level ploidy: + sample_name copy_number + or a region-specific format: + seq_name start end sample_name copy_number + ... for each region in each sample which does not have the + default copy number as set by --ploidy. These fields can be delimited + by space or tab. + +**Output**:: + + --vcf FILE Output VCF-format results to FILE. (default: stdout) + --gvcf Write gVCF output, which indicates coverage in uncalled regions. + --gvcf-chunk NUM When writing gVCF output emit a record for every NUM bases. + --gvcf-dont-use-chunk When writing the gVCF output emit a record for all bases if + set to "true" , will also route an int to --gvcf-chunk + similar to --output-mode EMIT_ALL_SITES from GATK + --variant-input VCF Use variants reported in VCF file as input to the algorithm. + Variants in this file will included in the output even if + there is not enough support in the data to pass input filters. + --only-use-input-alleles Only provide variant calls and genotype likelihoods for sites + and alleles which are provided in the VCF input, and provide + output in the VCF for all input alleles, not just those which + have support in the data. + --haplotype-basis-alleles VCF When specified, only variant alleles provided in this input + VCF will be used for the construction of complex or haplotype + alleles. + --report-all-haplotype-alleles At sites where genotypes are made over haplotype alleles, + provide information about all alleles in output, not only + those which are called. + --report-monomorphic Report even loci which appear to be monomorphic, and report all + considered alleles, even those which are not in called genotypes. + Loci which do not have any potential alternates have '.' for ALT. + --pvar N Report sites if the probability that there is a polymorphism + at the site is greater than N. default: 0.0. Note that post- + filtering is generally recommended over the use of this parameter. + --strict-vcf Generate strict VCF format (FORMAT/GQ will be an int) + +**Population model**:: + + --theta N The expected mutation rate or pairwise nucleotide diversity + among the population under analysis. This serves as the + single parameter to the Ewens Sampling Formula prior model + default: 0.001 + --ploidy N Sets the default ploidy for the analysis to N. default: 2 + --pooled-discrete Assume that samples result from pooled sequencing. + Model pooled samples using discrete genotypes across pools. + When using this flag, set --ploidy to the number of + alleles in each sample or use the --cnv-map to define + per-sample ploidy. + --pooled-continuous Output all alleles which pass input filters, regardles of + genotyping outcome or model. + +**Reference allele**:: + + --use-reference-allele This flag includes the reference allele in the analysis as + if it is another sample from the same population. + --reference-quality MQ,BQ Assign mapping quality of MQ to the reference allele at each + site and base quality of BQ. default: 100,60 + +**Allele scope**:: + + --use-best-n-alleles N Evaluate only the best N SNP alleles, ranked by sum of + supporting quality scores. (Set to 0 to use all; default: all) + --max-complex-gap + --haplotype-length N Allow haplotype calls with contiguous embedded matches of up + to this length. Set N=-1 to disable clumping. (default: 3) + --min-repeat-size When assembling observations across repeats, require the total repeat + length at least this many bp. (default: 5) + --min-repeat-entropy N To detect interrupted repeats, build across sequence until it has + entropy > N bits per bp. Set to 0 to turn off. (default: 1) + --no-partial-observations Exclude observations which do not fully span the dynamically-determined + detection window. (default, use all observations, dividing partial + support across matching haplotypes when generating haplotypes.) + +**Indel realignment**:: + + --dont-left-align-indels Turn off left-alignment of indels, which is enabled by default. + +**Input filters**:: + + --use-duplicate-reads Include duplicate-marked alignments in the analysis. + default: exclude duplicates marked as such in alignments + --min-mapping-quality Q Exclude alignments from analysis if they have a mapping + quality less than Q. default: 1 + --min-base-quality Q Exclude alleles from analysis if their supporting base + quality is less than Q. default: 0 + --min-supporting-allele-qsum Q Consider any allele in which the sum of qualities of supporting + observations is at least Q. default: 0 + --min-supporting-mapping-qsum Q Consider any allele in which and the sum of mapping qualities of + supporting reads is at least Q. default: 0 + --mismatch-base-quality-threshold Q Count mismatches toward --read-mismatch-limit if the base + quality of the mismatch is >= Q. default: 10 + --read-mismatch-limit N Exclude reads with more than N mismatches where each mismatch + has base quality >= mismatch-base-quality-threshold. + default: ~unbounded + --read-max-mismatch-fraction N Exclude reads with more than N [0,1] fraction of mismatches where + each mismatch has base quality >= mismatch-base-quality-threshold + default: 1.0 + --read-snp-limit N Exclude reads with more than N base mismatches, ignoring gaps + with quality >= mismatch-base-quality-threshold. + default: ~unbounded + --read-indel-limit N Exclude reads with more than N separate gaps. + default: ~unbounded + --standard-filters Use stringent input base and mapping quality filters + Equivalent to -m 30 -q 20 -R 0 -S 0 + --min-alternate-fraction N Require at least this fraction of observations supporting + an alternate allele within a single individual in the + in order to evaluate the position. default: 0.05 + --min-alternate-count N Require at least this count of observations supporting + an alternate allele within a single individual in order + to evaluate the position. default: 2 + --min-alternate-qsum N Require at least this sum of quality of observations supporting + an alternate allele within a single individual in order + to evaluate the position. default: 0 + --min-alternate-total N Require at least this count of observations supporting + an alternate allele within the total population in order + to use the allele in analysis. default: 1 + --min-coverage N Require at least this coverage to process a site. default: 0 + --limit-coverage N Downsample per-sample coverage to this level if greater than this coverage. + default: no limit + --skip-coverage N Skip processing of alignments overlapping positions with coverage >N. + This filters sites above this coverage, but will also reduce data nearby. + default: no limit + +**Population priors**:: + + --no-population-priors Equivalent to --pooled-discrete --hwe-priors-off and removal of + Ewens Sampling Formula component of priors. + +**Mappability priors**:: + + --hwe-priors-off Disable estimation of the probability of the combination + arising under HWE given the allele frequency as estimated + by observation frequency. + --binomial-obs-priors-off Disable incorporation of prior expectations about observations. + Uses read placement probability, strand balance probability, + and read position (5'-3') probability. + --allele-balance-priors-off Disable use of aggregate probability of observation balance between alleles + as a component of the priors. + +**Genotype likelihoods**:: + + --observation-bias FILE Read length-dependent allele observation biases from FILE. + The format is [length] [alignment efficiency relative to reference] + where the efficiency is 1 if there is no relative observation bias. + --base-quality-cap Q Limit estimated observation quality by capping base quality at Q. + --prob-contamination F An estimate of contamination to use for all samples. default: 10e-9 + --legacy-gls Use legacy (polybayes equivalent) genotype likelihood calculations + --contamination-estimates FILE A file containing per-sample estimates of contamination, such as + those generated by VerifyBamID. The format should be: + sample p(read=R|genotype=AR) p(read=A|genotype=AA) + Sample '*' can be used to set default contamination estimates. + +**Algorithmic features**:: + + --report-genotype-likelihood-max Report genotypes using the maximum-likelihood estimate provided + from genotype likelihoods. + --genotyping-max-iterations N Iterate no more than N times during genotyping step. default: 1000. + --genotyping-max-banddepth N Integrate no deeper than the Nth best genotype by likelihood when + genotyping. default: 6. + --posterior-integration-limits N,M Integrate all genotype combinations in our posterior space + which include no more than N samples with their Mth best + data likelihood. default: 1,3. + --exclude-unobserved-genotypes Skip sample genotypings for which the sample has no supporting reads. + --genotype-variant-threshold N Limit posterior integration to samples where the second-best + genotype likelihood is no more than log(N) from the highest + genotype likelihood for the sample. default: ~unbounded + --use-mapping-quality Use mapping quality of alleles when calculating data likelihoods. + --harmonic-indel-quality Use a weighted sum of base qualities around an indel, scaled by the + distance from the indel. By default use a minimum BQ in flanking sequence. + --read-dependence-factor N Incorporate non-independence of reads by scaling successive + observations by this factor during data likelihood + calculations. default: 0.9 + --genotype-qualities Calculate the marginal probability of genotypes and report as GQ in + each sample field in the VCF output. + +------ + **Acknowledgments** The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko. TNG was developed by Bjoern Gruening. +]]> </help> <expand macro="citations"> <citation type="bibtex">