diff freebayes.xml @ 11:f14331392369 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit 24f33bda62a7b6771ad42a39fe8c683e09f6d8b8"
author iuc
date Wed, 06 Nov 2019 17:02:36 -0500
parents 2fb16f415220
children 2667d6a97b50
line wrap: on
line diff
--- a/freebayes.xml	Wed Oct 25 08:29:29 2017 -0400
+++ b/freebayes.xml	Wed Nov 06 17:02:36 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@-0">
+<tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@">
     <description>bayesian genetic variant detector</description>
     <macros>
         <import>macros.xml</import>
@@ -77,26 +77,29 @@
         ## Outputs
         --vcf './vcf_output/part_\$i.vcf'
 
+        ## Coverage
+        #if str($coverage_options.coverage_options_selector) == "set":
+            @COVERAGE@
+        #end if
+
         ##advanced options
         #if str( $options_type.options_type_selector ) == "simple":
             #pass
         #elif str( $options_type.options_type_selector ) == "simple_w_filters":
             --standard-filters
-            --min-coverage ${options_type.min_coverage}
         #elif str( $options_type.options_type_selector ) == "naive":
             --haplotype-length 0
             --min-alternate-count 1
-            --min-alternate-fraction 0
+            --min-alternate-fraction 0.05
             --pooled-continuous
             --report-monomorphic
         #elif str( $options_type.options_type_selector ) == "naive_w_filters":
             --haplotype-length 0
             --min-alternate-count 1
-            --min-alternate-fraction 0
+            --min-alternate-fraction 0.05
             --pooled-continuous
             --report-monomorphic
             --standard-filters
-            --min-coverage ${options_type.min_coverage}
         #elif str( $options_type.options_type_selector ) == "full":
             #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set':
                 ${options_type.optional_inputs.report_monomorphic}
@@ -189,7 +192,6 @@
                   --read-snp-limit ${options_type.input_filters.mismatch_filters.read_snp_limit}
                 #end if
 
-                --min-coverage ${options_type.input_filters.min_coverage}
                 --min-alternate-qsum ${options_type.input_filters.min_alternate_qsum}
             #end if
 
@@ -305,6 +307,17 @@
                 <param name="region_end" type="integer" label="Region End" value="" />
             </when>
         </conditional>
+        <conditional name="coverage_options">
+            <param name="coverage_options_selector" type="select" label="Read coverage"
+                   help="Sets --min-coverage, --limit-coverage, and --skip-coverage">
+                <option value="do_not_set" selected="true">Use defaults</option>
+                <option value="set">Specify coverage options</option>
+            </param>
+            <when value="set">
+                <expand macro="par_min_cov" />
+            </when>
+            <when value="do_not_set" />
+        </conditional>
         <conditional name="options_type">
             <param name="options_type_selector" type="select" label="Choose parameter selection level"
                    help="Select how much control over the freebayes run you need">
@@ -315,6 +328,7 @@
                 <option value="full">5. Full list of options</option>
             </param>
             <when value="full">
+
                 <conditional name="optional_inputs">
                     <param name="optional_inputs_selector" type="select" label="Additional inputs"
                            help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --variant-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates">
@@ -439,7 +453,7 @@
                                label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" />
                         <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5"
                                label="When assembling observations across repeats, require the total repeat length at least this many bp" />
-                        <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="0"
+                        <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1"
                                label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" />
                         <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false"
                                label="Exclude observations which do not fully span the dynamically-determined detection window"
@@ -496,7 +510,7 @@
                         <param name="standard_filters" argument="--standard-filters" type="boolean" truevalue="-0" falsevalue="" checked="false"
                                label="Use stringent input base and mapping quality filters"
                                help="Equivalent to -m 30 -q 20 -R 0 -S 0" />
-                        <param name="F" argument="--min-alternate-fraction" type="float" value="0.2"
+                        <param name="F" argument="--min-alternate-fraction" type="float" value="0.05"
                                label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" />
                         <param name="C" argument="--min-alternate-count" type="integer" value="2"
                                label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" />
@@ -504,7 +518,6 @@
                                label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" />
                         <param name="G" argument="--min-alternate-total" type="integer" value="1"
                                label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" />
-                        <expand macro="par_min_cov" />
                     </when>
                     <when value="do_not_set" />
                 </conditional>
@@ -585,17 +598,9 @@
                 </conditional>
             </when>
             <when value="simple" />
-            <when value="simple_w_filters">
-                <!-- add standard-filters to command line -->
-                <expand macro="par_min_cov" />
-            </when>
-            <when value="naive">
-                <!-- build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic -->
-            </when>
-            <when value="naive_w_filters">
-                <!-- build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters-->
-                <expand macro="par_min_cov" />
-            </when>
+            <when value="simple_w_filters" />
+            <when value="naive" />
+            <when value="naive_w_filters" />
         </conditional>
     </inputs>
     <outputs>
@@ -622,6 +627,7 @@
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
             <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
             <param name="options_type_selector" value="naive_w_filters"/>
+            <param name="coverage_options_selector" value="set" />
             <param name="min_coverage" value="14"/>
             <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" />
         </test>
@@ -631,6 +637,7 @@
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
             <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
             <param name="options_type_selector" value="naive_w_filters"/>
+            <param name="coverage_options_selector" value="set" />
             <param name="min_coverage" value="14"/>
             <output name="output_vcf" file="freebayes-phix174-test3.vcf" lines_diff="4" />
         </test>
@@ -644,8 +651,38 @@
             <param name="P" value="1"/>
             <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" />
         </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="processmode" value="individual" />
+            <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
+            <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/>
+            <param name="options_type_selector" value="simple"/>
+            <param name="coverage_options_selector" value="set" />
+            <param name="min_coverage" value="250" />
+            <output name="output_vcf" file="freebayes-hxb2-test5.vcf" lines_diff="4" />
+        </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="processmode" value="individual" />
+            <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
+            <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/>
+            <param name="options_type_selector" value="simple"/>
+            <param name="coverage_options_selector" value="set" />
+            <param name="limit_coverage" value="400" />
+            <output name="output_vcf" file="freebayes-hxb2-test6.vcf" lines_diff="4" />
+        </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="processmode" value="individual" />
+            <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
+            <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/>
+            <param name="options_type_selector" value="simple"/>
+            <param name="coverage_options_selector" value="set" />
+            <param name="skip_coverage" value="100" />
+            <output name="output_vcf" file="freebayes-hxb2-test7.vcf" lines_diff="4" />
+        </test>
     </tests>
-    <help>
+    <help><![CDATA[
 **What it does**
 
 FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment.
@@ -680,10 +717,212 @@
 
 ------
 
+**Command-line parameters**
+
+**Input**::
+
+    --bam FILE                          The file or set of BAM files to be analyzed.
+    --bam-list FILE                     A file containing a list of BAM files to be analyzed.
+
+    --stdin                             Read BAM input on stdin.
+    --fasta-reference FILE              Use FILE as the reference sequence for analysis.
+                                        An index file (FILE.fai) will be created if none exists.
+                                        If neither --targets nor --region are specified, FreeBayes
+                                        will analyze every position in this reference.
+    --targets FILE                      Limit analysis to targets listed in the BED-format FILE.
+    --region <chrom>:<start>-<end>      Limit analysis to the specified region, 0-base coordinates,
+                                        end_position not included (same as BED format).
+                                        Either '-' or '..' maybe used as a separator.
+    --samples FILE                      Limit analysis to samples listed (one per line) in the FILE.
+                                        By default FreeBayes will analyze all samples in its input
+                                        BAM files.
+    --populations FILE                  Each line of FILE should list a sample and a population which
+                                        it is part of.  The population-based bayesian inference model
+                                        will then be partitioned on the basis of the populations.
+    --cnv-map FILE                      Read a copy number map from the BED file FILE, which has
+                                        either a sample-level ploidy:
+                                        sample_name copy_number
+                                        or a region-specific format:
+                                        seq_name start end sample_name copy_number
+                                        ... for each region in each sample which does not have the
+                                        default copy number as set by --ploidy. These fields can be delimited
+                                        by space or tab.
+
+**Output**::
+
+    --vcf FILE                          Output VCF-format results to FILE. (default: stdout)
+    --gvcf                              Write gVCF output, which indicates coverage in uncalled regions.
+    --gvcf-chunk NUM                    When writing gVCF output emit a record for every NUM bases.
+    --gvcf-dont-use-chunk               When writing the gVCF output emit a record for all bases if
+                                        set to "true" , will also route an int to --gvcf-chunk
+                                        similar to --output-mode EMIT_ALL_SITES from GATK
+    --variant-input VCF                 Use variants reported in VCF file as input to the algorithm.
+                                        Variants in this file will included in the output even if
+                                        there is not enough support in the data to pass input filters.
+    --only-use-input-alleles            Only provide variant calls and genotype likelihoods for sites
+                                        and alleles which are provided in the VCF input, and provide
+                                        output in the VCF for all input alleles, not just those which
+                                        have support in the data.
+    --haplotype-basis-alleles VCF       When specified, only variant alleles provided in this input
+                                        VCF will be used for the construction of complex or haplotype
+                                        alleles.
+    --report-all-haplotype-alleles      At sites where genotypes are made over haplotype alleles,
+                                        provide information about all alleles in output, not only
+                                        those which are called.
+    --report-monomorphic                Report even loci which appear to be monomorphic, and report all
+                                        considered alleles, even those which are not in called genotypes.
+                                        Loci which do not have any potential alternates have '.' for ALT.
+    --pvar N                            Report sites if the probability that there is a polymorphism
+                                        at the site is greater than N.  default: 0.0.  Note that post-
+                                        filtering is generally recommended over the use of this parameter.
+    --strict-vcf                        Generate strict VCF format (FORMAT/GQ will be an int)
+
+**Population model**::
+
+    --theta N                           The expected mutation rate or pairwise nucleotide diversity
+                                        among the population under analysis.  This serves as the
+                                        single parameter to the Ewens Sampling Formula prior model
+                                        default: 0.001
+    --ploidy N                          Sets the default ploidy for the analysis to N.  default: 2
+    --pooled-discrete                   Assume that samples result from pooled sequencing.
+                                        Model pooled samples using discrete genotypes across pools.
+                                        When using this flag, set --ploidy to the number of
+                                        alleles in each sample or use the --cnv-map to define
+                                        per-sample ploidy.
+    --pooled-continuous                 Output all alleles which pass input filters, regardles of
+                                        genotyping outcome or model.
+
+**Reference allele**::
+
+    --use-reference-allele              This flag includes the reference allele in the analysis as
+                                        if it is another sample from the same population.
+    --reference-quality MQ,BQ           Assign mapping quality of MQ to the reference allele at each
+                                        site and base quality of BQ.  default: 100,60
+
+**Allele scope**::
+
+    --use-best-n-alleles N              Evaluate only the best N SNP alleles, ranked by sum of
+                                        supporting quality scores.  (Set to 0 to use all; default: all)
+    --max-complex-gap
+    --haplotype-length N                Allow haplotype calls with contiguous embedded matches of up
+                                        to this length. Set N=-1 to disable clumping. (default: 3)
+    --min-repeat-size                   When assembling observations across repeats, require the total repeat
+                                        length at least this many bp.  (default: 5)
+    --min-repeat-entropy N              To detect interrupted repeats, build across sequence until it has
+                                        entropy > N bits per bp. Set to 0 to turn off. (default: 1)
+    --no-partial-observations           Exclude observations which do not fully span the dynamically-determined
+                                        detection window.  (default, use all observations, dividing partial
+                                        support across matching haplotypes when generating haplotypes.)
+
+**Indel realignment**::
+
+    --dont-left-align-indels            Turn off left-alignment of indels, which is enabled by default.
+
+**Input filters**::
+
+    --use-duplicate-reads               Include duplicate-marked alignments in the analysis.
+                                        default: exclude duplicates marked as such in alignments
+    --min-mapping-quality Q             Exclude alignments from analysis if they have a mapping
+                                        quality less than Q.  default: 1
+    --min-base-quality Q                Exclude alleles from analysis if their supporting base
+                                        quality is less than Q.  default: 0
+    --min-supporting-allele-qsum Q      Consider any allele in which the sum of qualities of supporting
+                                        observations is at least Q.  default: 0
+    --min-supporting-mapping-qsum Q     Consider any allele in which and the sum of mapping qualities of
+                                        supporting reads is at least Q.  default: 0
+    --mismatch-base-quality-threshold Q Count mismatches toward --read-mismatch-limit if the base
+                                        quality of the mismatch is >= Q.  default: 10
+    --read-mismatch-limit N             Exclude reads with more than N mismatches where each mismatch
+                                        has base quality >= mismatch-base-quality-threshold.
+                                        default: ~unbounded
+    --read-max-mismatch-fraction N      Exclude reads with more than N [0,1] fraction of mismatches where
+                                        each mismatch has base quality >= mismatch-base-quality-threshold
+                                        default: 1.0
+    --read-snp-limit N                  Exclude reads with more than N base mismatches, ignoring gaps
+                                        with quality >= mismatch-base-quality-threshold.
+                                        default: ~unbounded
+    --read-indel-limit N                Exclude reads with more than N separate gaps.
+                                        default: ~unbounded
+    --standard-filters                  Use stringent input base and mapping quality filters
+                                        Equivalent to -m 30 -q 20 -R 0 -S 0
+    --min-alternate-fraction N          Require at least this fraction of observations supporting
+                                        an alternate allele within a single individual in the
+                                        in order to evaluate the position.  default: 0.05
+    --min-alternate-count N             Require at least this count of observations supporting
+                                        an alternate allele within a single individual in order
+                                        to evaluate the position.  default: 2
+    --min-alternate-qsum N              Require at least this sum of quality of observations supporting
+                                        an alternate allele within a single individual in order
+                                        to evaluate the position.  default: 0
+    --min-alternate-total N             Require at least this count of observations supporting
+                                        an alternate allele within the total population in order
+                                        to use the allele in analysis.  default: 1
+    --min-coverage N                    Require at least this coverage to process a site. default: 0
+    --limit-coverage N                  Downsample per-sample coverage to this level if greater than this coverage.
+                                        default: no limit
+    --skip-coverage N                   Skip processing of alignments overlapping positions with coverage >N.
+                                        This filters sites above this coverage, but will also reduce data nearby.
+                                        default: no limit
+
+**Population priors**::
+
+    --no-population-priors              Equivalent to --pooled-discrete --hwe-priors-off and removal of
+                                        Ewens Sampling Formula component of priors.
+
+**Mappability priors**::
+
+    --hwe-priors-off                    Disable estimation of the probability of the combination
+                                        arising under HWE given the allele frequency as estimated
+                                        by observation frequency.
+    --binomial-obs-priors-off           Disable incorporation of prior expectations about observations.
+                                        Uses read placement probability, strand balance probability,
+                                        and read position (5'-3') probability.
+    --allele-balance-priors-off         Disable use of aggregate probability of observation balance between alleles
+                                        as a component of the priors.
+
+**Genotype likelihoods**::
+
+    --observation-bias FILE             Read length-dependent allele observation biases from FILE.
+                                        The format is [length] [alignment efficiency relative to reference]
+                                        where the efficiency is 1 if there is no relative observation bias.
+    --base-quality-cap Q                Limit estimated observation quality by capping base quality at Q.
+    --prob-contamination F              An estimate of contamination to use for all samples.  default: 10e-9
+    --legacy-gls                        Use legacy (polybayes equivalent) genotype likelihood calculations
+    --contamination-estimates FILE      A file containing per-sample estimates of contamination, such as
+                                        those generated by VerifyBamID.  The format should be:
+                                        sample p(read=R|genotype=AR) p(read=A|genotype=AA)
+                                        Sample '*' can be used to set default contamination estimates.
+
+**Algorithmic features**::
+
+    --report-genotype-likelihood-max    Report genotypes using the maximum-likelihood estimate provided
+                                        from genotype likelihoods.
+    --genotyping-max-iterations N       Iterate no more than N times during genotyping step. default: 1000.
+    --genotyping-max-banddepth N        Integrate no deeper than the Nth best genotype by likelihood when
+                                        genotyping. default: 6.
+    --posterior-integration-limits N,M  Integrate all genotype combinations in our posterior space
+                                        which include no more than N samples with their Mth best
+                                        data likelihood. default: 1,3.
+    --exclude-unobserved-genotypes      Skip sample genotypings for which the sample has no supporting reads.
+    --genotype-variant-threshold N      Limit posterior integration to samples where the second-best
+                                        genotype likelihood is no more than log(N) from the highest
+                                        genotype likelihood for the sample.  default: ~unbounded
+    --use-mapping-quality               Use mapping quality of alleles when calculating data likelihoods.
+    --harmonic-indel-quality            Use a weighted sum of base qualities around an indel, scaled by the
+                                        distance from the indel.  By default use a minimum BQ in flanking sequence.
+    --read-dependence-factor N          Incorporate non-independence of reads by scaling successive
+                                        observations by this factor during data likelihood
+                                        calculations.  default: 0.9
+    --genotype-qualities                Calculate the marginal probability of genotypes and report as GQ in
+                                        each sample field in the VCF output.
+
+------
+
 **Acknowledgments**
 
 The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko.
 TNG was developed by Bjoern Gruening.
+]]>
     </help>
     <expand macro="citations">
         <citation type="bibtex">