Mercurial > repos > recetox > freebayes
changeset 0:13bde05924da draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit 4b9af142d00f49fdc99bc635ec5ca1db032b1fc1"
author | recetox |
---|---|
date | Fri, 14 Aug 2020 13:59:45 +0000 |
parents | |
children | |
files | freebayes.xml leftalign.xml macros.xml test-data/freebayes-hxb2-test5.vcf test-data/freebayes-hxb2-test6.vcf test-data/freebayes-hxb2-test7.vcf test-data/freebayes-hxb2.bam test-data/freebayes-hxb2.fasta test-data/freebayes-phix174-test1.vcf test-data/freebayes-phix174-test2.vcf test-data/freebayes-phix174-test3.vcf test-data/freebayes-phix174-test4.vcf test-data/freebayes-phix174.bam test-data/freebayes-phix174.fasta test-data/left-align-input.bam test-data/left-align-output.bam test-data/leftalign.fa tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample |
diffstat | 19 files changed, 1677 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/freebayes.xml Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,943 @@ +<tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@"> + <description>bayesian genetic variant detector</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.1.3">gawk</requirement> + <requirement type="package" version="20170422">parallel</requirement> + </expand> + <command detect_errors="exit_code"><![CDATA[ + ##set up input files + + #set $reference_fasta_filename = "localref.fa" + + #if str( $reference_source.reference_source_selector ) == "history": + ln -s -f '${reference_source.ref_file}' '${reference_fasta_filename}' && + samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && + #else: + #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) + #end if + + #if $reference_source.batchmode.processmode == 'merge': + #set $input_bamfiles = $reference_source.batchmode.input_bams + #else: + #set $input_bamfiles = [ $reference_source.batchmode.input_bams ] + #end if + + #for $bam_count, $input_bam in enumerate( $input_bamfiles ): + ln -s -f '${input_bam}' 'b_${bam_count}.bam' && + ln -s -f '${input_bam.metadata.bam_index}' 'b_${bam_count}.bam.bai' && + #end for + + ## Tabixize optional input_variant_vcf file (for --variant-input option) + #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": + ln -s -f '${options_type.optional_inputs.input_variant_type.input_variant_vcf}' input_variant_vcf.vcf.gz && + ln -s -f '${Tabixized_input}' input_variant_vcf.vcf.gz.tbi && + #end if + + ##if the user has specified a region or target file, just use that instead of calculating a set of unique regions + #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file": + ln -s '${target_limit_type.input_target_bed}' regions_all.bed && + #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region": + printf '${target_limit_type.region_chromosome}\t${target_limit_type.region_start}\t${target_limit_type.region_end}' > regions_all.bed && + #else + ##divide up the regions in the bam file for efficient processing + #for $bam_count, $input_bam in enumerate( $input_bamfiles ): + samtools view -H b_${bam_count}.bam | + grep '^@SQ' | + cut -f 2- | + awk '{ gsub("^SN:","",$1); gsub("^LN:","",$2); print $1"\t0\t"$2; }' >> regions_all.bed && + #end for + #end if + + sort -u regions_all.bed > regions_uniq.bed && + ## split into even small chunks, this has some disatvantages and will not be used for the moment + ## bedtools makewindows -b regions_uniq.bed -w 10000000 -s 9990000 > regions.bed && + + mkdir vcf_output failed_alleles trace && + + ## Finished setting up inputs + + for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`; + do + echo " + + ## COMMAND LINE STARTS HERE + + freebayes + + --region '\$i' + + #for $bam_count, $input_bam in enumerate( $input_bamfiles ): + --bam 'b_${bam_count}.bam' + #end for + --fasta-reference '${reference_fasta_filename}' + + ## Outputs + --vcf './vcf_output/part_\$i.vcf' + + ## Coverage + #if str($coverage_options.coverage_options_selector) == "set": + @COVERAGE@ + #end if + + ##advanced options + #if str( $options_type.options_type_selector ) == "simple": + #pass + #elif str( $options_type.options_type_selector ) == "simple_w_filters": + --standard-filters + #elif str( $options_type.options_type_selector ) == "naive": + --haplotype-length 0 + --min-alternate-count 1 + --min-alternate-fraction 0.05 + --pooled-continuous + --report-monomorphic + #elif str( $options_type.options_type_selector ) == "naive_w_filters": + --haplotype-length 0 + --min-alternate-count 1 + --min-alternate-fraction 0.05 + --pooled-continuous + --report-monomorphic + --standard-filters + #elif str( $options_type.options_type_selector ) == "full": + #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set': + ${options_type.optional_inputs.report_monomorphic} + + #if $options_type.optional_inputs.output_trace_option: + --trace ./trace/part_'\$i'.txt + #end if + #if $options_type.optional_inputs.output_failed_alleles_option: + --failed-alleles ./failed_alleles/part_'\$i'.bed + #end if + #if $options_type.optional_inputs.samples: + --samples '${options_type.optional_inputs.samples}' + #end if + #if $options_type.optional_inputs.populations: + --populations '${options_type.optional_inputs.populations}' + #end if + #if $options_type.optional_inputs.A: + --cnv-map '${options_type.optional_inputs.A}' + #end if + #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": + --variant-input 'input_variant_vcf.vcf.gz' ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_variant_vcf file" section of the command line above + ${options_type.optional_inputs.input_variant_type.only_use_input_alleles} + #end if + #if $options_type.optional_inputs.haplotype_basis_alleles: + --haplotype-basis-alleles '${options_type.optional_inputs.haplotype_basis_alleles}' + #end if + #if $options_type.optional_inputs.observation_bias: + --observation-bias '${options_type.optional_inputs.observation_bias}' + #end if + #if $options_type.optional_inputs.contamination_estimates: + --contamination-estimates '${options_type.optional_inputs.contamination_estimates}' + #end if + #end if + + ## REPORTING + #if str( $options_type.reporting.reporting_selector ) == "set": + --pvar ${options_type.reporting.pvar} + #end if + ## POPULATION MODEL + #if str( $options_type.population_model.population_model_selector ) == "set": + --theta ${options_type.population_model.T} + --ploidy ${options_type.population_model.P} + ${options_type.population_model.J} + ${options_type.population_model.K} + #end if + + ## REFERENCE ALLELE + #if str( $options_type.reference_allele.reference_allele_selector ) == "set": + ${options_type.reference_allele.Z} + --reference-quality '${options_type.reference_allele.reference_quality}' + #end if + + ## ALLELE SCOPE + #if str( $options_type.allele_scope.allele_scope_selector ) == "set": + ${options_type.allele_scope.I} + ${options_type.allele_scope.i} + ${options_type.allele_scope.X} + ${options_type.allele_scope.u} + ${options_type.allele_scope.no_partial_observations} + + -n ${options_type.allele_scope.n} + + --haplotype-length ${options_type.allele_scope.haplotype_length} + --min-repeat-size ${options_type.allele_scope.min_repeat_length} + --min-repeat-entropy ${options_type.allele_scope.min_repeat_entropy} + #end if + + ## REALIGNMENT + ${options_type.O} + + ##INPUT FILTERS + #if str( $options_type.input_filters.input_filters_selector ) == "set": + ${options_type.input_filters.use_duplicate_reads} + -m ${options_type.input_filters.m} + -q ${options_type.input_filters.q} + -R ${options_type.input_filters.R} + -Y ${options_type.input_filters.Y} + -e ${options_type.input_filters.e} + -F ${options_type.input_filters.F} + -C ${options_type.input_filters.C} + -G ${options_type.input_filters.G} + + #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "set": + -Q ${options_type.input_filters.mismatch_filters.Q} + #if str($options_type.input_filters.mismatch_filters.U) + -U ${options_type.input_filters.mismatch_filters.U} + #end if + -z ${options_type.input_filters.mismatch_filters.z} + + --read-snp-limit ${options_type.input_filters.mismatch_filters.read_snp_limit} + #end if + + --min-alternate-qsum ${options_type.input_filters.min_alternate_qsum} + #end if + + ## POPULATION AND MAPPABILITY PRIORS + #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "set": + ${options_type.population_mappability_priors.k} + ${options_type.population_mappability_priors.w} + ${options_type.population_mappability_priors.V} + ${options_type.population_mappability_priors.a} + #end if + + ## GENOTYPE LIKELIHOODS + #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "set": + ${$options_type.genotype_likelihoods.experimental_gls} + + --base-quality-cap ${$options_type.genotype_likelihoods.base_quality_cap} + --prob-contamination ${$options_type.genotype_likelihoods.prob_contamination} + #end if + + ## ALGORITHMIC FEATURES + #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "set": + -B '${options_type.algorithmic_features.B}' + -W '${options_type.algorithmic_features.W}' + -D '${options_type.algorithmic_features.D}' + + #if str($options_type.algorithmic_features.genotype_variant_threshold) + -S ${options_type.algorithmic_features.genotype_variant_threshold} + #end if + + ${options_type.algorithmic_features.N} + ${options_type.algorithmic_features.j} + ${options_type.algorithmic_features.H} + ${options_type.algorithmic_features.genotype_qualities} + ${options_type.algorithmic_features.report_genotype_likelihood_max} + + --genotyping-max-banddepth ${options_type.algorithmic_features.genotyping_max_banddepth} + #end if + #end if + + "; + done > freebayes_commands.sh && + + cat freebayes_commands.sh | + parallel --will-cite -j \${GALAXY_SLOTS:-1} && + + ## make VCF header + grep "^#" "./vcf_output/part_\$i.vcf" > header.txt && + + for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`; + do + ## if this fails then it bails out the script + cat "./vcf_output/part_\$i.vcf" | grep -v "^#" || true + ; + done | sort -k1,1 -k2,2n -k5,5 -u | cat header.txt - > '${output_vcf}' + + #if str( $options_type.options_type_selector ) == "full": + #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set': + #if $options_type.optional_inputs.output_failed_alleles_option: + && + for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`; + do + cat "./failed_alleles/part_\$i.bed" + ; + done > '${output_failed_alleles_bed}' + #end if + + #if $options_type.optional_inputs.output_trace_option: + && + for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`; + do + cat './trace/part_\$i.txt' + ; + done > '${output_trace}' + #end if + #end if + #end if + ]]></command> + + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <expand macro="input_bam"> + <expand macro="validation" /> + </expand> + <param name="ref_file" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes" /> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input dataset"/> + </param> + </when> + <when value="history"> <!-- FIX ME!!!! --> + <expand macro="input_bam" /> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" + help="You can upload a FASTA sequence to the history and use it as reference" /> + </when> + </conditional> + <conditional name="target_limit_type"> + <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options"> + <option value="do_not_limit" selected="true">Do not limit</option> + <option value="limit_by_target_file">Limit by target file</option> + <option value="limit_by_region">Limit to region</option> + </param> + <when value="do_not_limit" /> + <when value="limit_by_target_file"> + <param name="input_target_bed" argument="--targets" type="data" format="bed" label="Limit analysis to regions in this BED dataset" /> + </when> + <when value="limit_by_region"> + <param name="region_chromosome" argument="--region" type="text" label="Region Chromosome" value="" /> <!--only once? --> + <param name="region_start" type="integer" label="Region Start" value="" /> + <param name="region_end" type="integer" label="Region End" value="" /> + </when> + </conditional> + <conditional name="coverage_options"> + <param name="coverage_options_selector" type="select" label="Read coverage" + help="Sets --min-coverage, --limit-coverage, and --skip-coverage"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Specify coverage options</option> + </param> + <when value="set"> + <expand macro="par_min_cov" /> + </when> + <when value="do_not_set" /> + </conditional> + <conditional name="options_type"> + <param name="options_type_selector" type="select" label="Choose parameter selection level" + help="Select how much control over the freebayes run you need"> + <option value="simple" selected="true">1. Simple diploid calling</option> + <option value="simple_w_filters">2. Simple diploid calling with filtering and coverage</option> + <option value="naive">3. Frequency-based pooled calling</option> + <option value="naive_w_filters">4. Frequency-based pooled calling with filtering and coverage</option> + <option value="full">5. Full list of options</option> + </param> + <when value="full"> + + <conditional name="optional_inputs"> + <param name="optional_inputs_selector" type="select" label="Additional inputs" + help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --variant-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates"> + <option value="do_not_set" selected="true">Do not provide additional inputs</option> + <option value="set">Provide additional inputs</option> + </param> + <when value="set"> + <param name="output_failed_alleles_option" argument="--failed-alleles" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="false" + label="Write out failed alleles file" /> + <param name="output_trace_option" argument="--trace" type="boolean" truevalue="--trace" falsevalue="" checked="false" + label="Write out algorithm trace file" /> + <param argument="--samples" type="data" format="txt" + label="Limit analysis to samples listed (one per line) in this dataset" optional="true" + help="By default FreeBayes will analyze all samples in its input BAM datasets" /> + <param argument="--populations" type="data" format="txt" optional="true" + label="Populations dataset" + help="Each line of this dataset should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" /> + <param name="A" argument="--cnv-map" type="data" format="bed" optional="true" + label="Read a copy number map from a BED dataset" + help="The BED dataset should have the format: 'reference sequence, start, end, sample name, copy number' for each region in each sample which does not have the default copy number as set by --ploidy. If not specified, copy number is set to as specified by --ploidy" /> + <conditional name="input_variant_type"> + <param name="input_variant_type_selector" type="select" label="Provide variants dataset"> + <option value="do_not_provide" selected="true">Do not provide</option> + <option value="provide_vcf">Provide VCF dataset</option> + </param> + <when value="do_not_provide" /> + <when value="provide_vcf"> + <param name="input_variant_vcf" argument="--variant-input" type="data" format="vcf_bgzip" + label="Use variants reported in this VCF dataset as input to the algorithm"> + <conversion name="Tabixized_input" type="tabix" /> + </param> + <param name="only_use_input_alleles" argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false" + label="Only provide variant calls and genotype likelihoods for sites in VCF" /> + </when> + </conditional> + <param name="haplotype_basis_alleles" argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true" + label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" /> + <param name="report_monomorphic" argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false" + label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes" /> + <param name="observation_bias" argument="--observation-bias" type="data" format="tabular" optional="true" + label="Load read length-dependent allele observation biases from" + help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" /> + <param name="contamination_estimates" argument="--contamination-estimates" type="data" format="tabular" optional="true" + label="Upload per-sample estimates of contamination from" + help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- reporting --> + <conditional name="reporting"> + <param name="reporting_selector" type="select" label="Reporting options" help="Sets -P --pvar option"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set reporting options</option> + </param> + <when value="set"> + <param argument="--pvar" type="float" value="0.0" + label="Report sites if the probability that there is a polymorphism at the site is greater than" + help="Note that post-filtering is generally recommended over the use of this parameter" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- population model --> + <conditional name="population_model"> + <param name="population_model_selector" type="select" label="Population model options" + help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set population model options</option> + </param> + <when value="set"> + <param name="T" argument="--theta" type="float" value="0.001" + label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis" + help="This serves as the single parameter to the Ewens Sampling Formula prior model" /> + <param name="P" argument="--ploidy" type="integer" value="2" + label="Set ploidy for the analysis" /> + <param name="J" argument="--pooled-discrete" type="boolean" truevalue="-J" falsevalue="" checked="false" + label="Assume that samples result from pooled sequencing" + help="Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy" /> + <param name="K" argument="--poled-continuous" type="boolean" truevalue="-K" falsevalue="" checked="false" + label="Output all alleles which pass input filters, regardles of genotyping outcome or model" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- reference allele --> + <conditional name="reference_allele"> + <param name="reference_allele_selector" type="select" label="Reference allele options" + help="Sets --use-reference-allele and --reference-quality options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set reference allele options</option> + </param> + <when value="set"> + <param name="Z" argument="--use-reference-allele" type="boolean" truevalue="-Z" falsevalue="" checked="false" + label="Include the reference allele in the analysis as if it is another sample from the same population" /> + <param name="reference_quality" argument="--reference-quality" type="text" value="100,60" + label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- allelic scope --> + <conditional name="allele_scope"> + <param name="allele_scope_selector" type="select" label="Allelic scope options" + help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set alleic scope options</option> + </param> + <when value="set"> + <param name="I" argument="--no-snps" type="boolean" truevalue="-I" falsevalue="" checked="false" + label="Ignore SNP alleles" /> + <param name="i" argument="--no-indels" type="boolean" truevalue="-i" falsevalue="" checked="false" + label="Ignore indels alleles" /> + <param name="X" argument="--no-mnps" type="boolean" truevalue="-X" falsevalue="" checked="false" + label="Ignore multi-nucleotide polymorphisms, MNPs" /> + <param name="u" argument="--no-complex" type="boolean" truevalue="-u" falsevalue="" checked="false" + label="Ignore complex events (composites of other classes)" /> + <param name="n" argument="--use-best-n-alleles" type="integer" value="0" + label="How many best SNP alleles to evaluate" + help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" /> + <param name="haplotype_length" argument="--haplotype-length" type="integer" value="3" + label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" /> + <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5" + label="When assembling observations across repeats, require the total repeat length at least this many bp" /> + <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1" + label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" /> + <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false" + label="Exclude observations which do not fully span the dynamically-determined detection window" + help="By default, FreeBayes uses all observations, dividing partial support across matching haplotypes when generating haplotypes" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- indel realignment --> + <param name="O" argument="--dont-left-align-indels" type="boolean" truevalue="-O" falsevalue="" checked="false" + label="Turn off left-alignment of indels" /> + + <!-- input filters --> + <conditional name="input_filters"> + <param name="input_filters_selector" type="select" label="Input filters" + help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -$, -e, -0, -F, -C, -3, -G, and -! options"> + <option value="do_not_set" selected="true">No input filters (default)</option> + <option value="set">Set input filters</option> + </param> + <when value="set"> + <param name="use_duplicate_reads" argument="--use-duplicate-reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="false" + label="Include duplicate-marked alignments in the analysis" /> + <param name="m" argument="--min-mapping-quality" type="integer" value="1" + label="Exclude alignments from analysis if they have a mapping quality less than" /> + <param name="q" argument="--min-base-quality" type="integer" value="0" + label="Exclude alleles from analysis if their supporting base quality less than" /> + <param name="R" argument="--min-supporting-allele-qsum" type="integer" value="0" + label="Consider any allele in which the sum of qualities of supporting observations is at least" /> + <param name="Y" argument="--min-supporting-mapping-qsum" type="integer" value="0" + label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" /> + <conditional name="mismatch_filters"> + <param name="mismatch_filters_selector" type="select" label="Mismatch filters" + help="Sets -Q, -U, -z, and $ options"> + <option value="do_not_set" selected="true">No mismatch filters (default)</option> + <option value="set">Set mismatch filters</option> + </param> + <when value="set"> + <param name="Q" argument="--mismatch-base-quality-threshold" type="integer" value="10" + label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" /> + <param name="U" type="integer" argument="--read-mismatch-limit" value="1000" optional="true" + label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (option above)" + help="default=~unbounded" /> + <param name="z" argument="--read-max-mismatch-fraction" type="float" value="1.0" min="0.0" max="1.0" + label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (second option above)" /> + <param name="read_snp_limit" argument="--read-snp-limit" type="integer" value="1000" + label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold (third option above)" + help="default=~unbounded" /> + </when> + <when value="do_not_set" /> + </conditional> + <param name="e" argument="--read-indel-limit" type="integer" value="1000" + label="Exclude reads with more than this number of separate gaps" + help="default=~unbounded" /> + <param name="standard_filters" argument="--standard-filters" type="boolean" truevalue="-0" falsevalue="" checked="false" + label="Use stringent input base and mapping quality filters" + help="Equivalent to -m 30 -q 20 -R 0 -S 0" /> + <param name="F" argument="--min-alternate-fraction" type="float" value="0.05" + label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" /> + <param name="C" argument="--min-alternate-count" type="integer" value="2" + label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" /> + <param name="min_alternate_qsum" argument="--min-alternate-qsum" type="integer" value="0" + label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" /> + <param name="G" argument="--min-alternate-total" type="integer" value="1" + label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- population and mappability priors --> + <conditional name="population_mappability_priors"> + <param name="population_mappability_priors_selector" type="select" label="Population and mappability priors" + help="Sets -k, -w, -V, and -a options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set population and mappability priors</option> + </param> + <when value="set"> + <param name="k" argument="--no-population-priors" type="boolean" truevalue="-k" falsevalue="" checked="false" + label="No population priors" + help="Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors" /> + <param name="w" argument="--hwe-priors-off" type="boolean" truevalue="-w" falsevalue="" checked="false" + label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" /> + <param name="V" argument="--binomial-obs-priors-off" type="boolean" truevalue="-V" falsevalue="" checked="false" + label="Disable incorporation of prior expectations about observations" + help="Uses read placement probability, strand balance probability, and read position (5''-3'') probability" /> + <param name="a" argument="--allele-balance-priors-off" type="boolean" truevalue="-a" falsevalue="" checked="false" + label="Disable use of aggregate probability of observation balance between alleles as a component of the priors" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- genotype likelihoods --> + <conditional name="genotype_likelihoods"> + <param name="genotype_likelihoods_selector" type="select" label="Genotype likelihood options" + help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set genotype likelihood options</option> + </param> + <when value="set"> + <param name="base_quality_cap" argument="--base-quality-cap" type="integer" value="0" + label="Limit estimated observation quality by capping base quality at" /> + <param name="experimental_gls" argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false" + label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" + help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples" /> + <param name="prob_contamination" argument="--prob-contamination" type="float" value="10e-9" + label="An estimate of contamination to use for all samples" /> + </when> + <when value="do_not_set" /> + </conditional> + + <!-- algorithmic features --> + <conditional name="algorithmic_features"> + <param name="algorithmic_features_selector" type="select" label="Algorithmic features" + help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set algorithmic features</option> + </param> + <when value="set"> + <param name="report_genotype_likelihood_max" argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false" + label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods" /> + <param name="B" argument="--genotyping-max-iterations" type="integer" value="1000" + label="Iterate no more than N times during genotyping step" /> + <param name="genotyping_max_banddepth" argument="--genotyping-max-banddepth" type="integer" value="6" + label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" /> + <param name="W" argument="--posterior-integration-limits" type="text" value="1,3" + label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" /> + <param name="N" argument="--exclude-unobserved-genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="false" + label="Skip sample genotypings for which the sample has no supporting reads" /> + <param name="genotype_variant_threshold" argument="--genotype-variant-threshold" type="integer" value="" optional="true" + label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" + help="default=~unbounded" /> + <param name="j" argument="--use-mapping-quality" type="boolean" truevalue="-j" falsevalue="" checked="false" + label="Use mapping quality of alleles when calculating data likelihoods" /> + <param name="H" argument="--harmonic-indel-quality" type="boolean" truevalue="-H" falsevalue="" checked="false" + label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel" + help="By default, FreeBayes uses a minimum Base Quality in flanking sequence" /> + <param name="D" argument="--read-dependence-factor" type="float" value="0.9" + label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" /> + <param name="genotype_qualities" argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false" + label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" /> + </when> + <when value="do_not_set" /> + </conditional> + </when> + <when value="simple" /> + <when value="simple_w_filters" /> + <when value="naive" /> + <when value="naive_w_filters" /> + </conditional> + </inputs> + <outputs> + <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> + <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> + <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter> + </data> + <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> + <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_trace_option'] is True</filter> + </data> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="simple"/> + <output name="output_vcf" file="freebayes-phix174-test1.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="naive_w_filters"/> + <param name="coverage_options_selector" value="set" /> + <param name="min_coverage" value="14"/> + <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="naive_w_filters"/> + <param name="coverage_options_selector" value="set" /> + <param name="min_coverage" value="14"/> + <output name="output_vcf" file="freebayes-phix174-test3.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="full"/> + <param name="population_model_selector" value="set"/> + <param name="P" value="1"/> + <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/> + <param name="options_type_selector" value="simple"/> + <param name="coverage_options_selector" value="set" /> + <param name="min_coverage" value="250" /> + <output name="output_vcf" file="freebayes-hxb2-test5.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/> + <param name="options_type_selector" value="simple"/> + <param name="coverage_options_selector" value="set" /> + <param name="limit_coverage" value="400" /> + <output name="output_vcf" file="freebayes-hxb2-test6.vcf" lines_diff="4" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/> + <param name="options_type_selector" value="simple"/> + <param name="coverage_options_selector" value="set" /> + <param name="skip_coverage" value="100" /> + <output name="output_vcf" file="freebayes-hxb2-test7.vcf" lines_diff="4" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment. + +See https://github.com/ekg/freebayes for details on FreeBayes. + +------ + +**Description** + +Provided some BAM dataset(s) and a reference sequence, FreeBayes will produce a VCF dataset describing SNPs, indels, and complex variants in samples in the input alignments. + +By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F). These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data. + +FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read. The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be set to half the read length. + +Ploidy may be set to any level (-p), but by default all samples are assumed to be diploid. FreeBayes can model per-sample and per-region variation in copy-number (-A) using a copy-number variation map. + +FreeBayes can act as a frequency-based pooled caller and describe variants and haplotypes in terms of observation frequency rather than called genotypes. To do so, use --pooled-continuous and set input filters to a suitable level. Allele observation counts will be described by AO and RO fields in the VCF output. + +------- + +**Galaxy-specific options** + +Galaxy allows five levels of control over FreeBayes options, provided by the **Choose parameter selection level** menu option. These are: + + 1. *Simple diploid calling*: The simplest possible FreeBayes application. Equivalent to using FreeBayes with only a BAM input and no other parameter options. + 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-variant-threshold 0) and --min-coverage. + 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling variants in mixtures such as viral, bacterial, or organellar genomes. + 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2. + 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy parameters. + +------ + +**Command-line parameters** + +**Input**:: + + --bam FILE The file or set of BAM files to be analyzed. + --bam-list FILE A file containing a list of BAM files to be analyzed. + + --stdin Read BAM input on stdin. + --fasta-reference FILE Use FILE as the reference sequence for analysis. + An index file (FILE.fai) will be created if none exists. + If neither --targets nor --region are specified, FreeBayes + will analyze every position in this reference. + --targets FILE Limit analysis to targets listed in the BED-format FILE. + --region <chrom>:<start>-<end> Limit analysis to the specified region, 0-base coordinates, + end_position not included (same as BED format). + Either '-' or '..' maybe used as a separator. + --samples FILE Limit analysis to samples listed (one per line) in the FILE. + By default FreeBayes will analyze all samples in its input + BAM files. + --populations FILE Each line of FILE should list a sample and a population which + it is part of. The population-based bayesian inference model + will then be partitioned on the basis of the populations. + --cnv-map FILE Read a copy number map from the BED file FILE, which has + either a sample-level ploidy: + sample_name copy_number + or a region-specific format: + seq_name start end sample_name copy_number + ... for each region in each sample which does not have the + default copy number as set by --ploidy. These fields can be delimited + by space or tab. + +**Output**:: + + --vcf FILE Output VCF-format results to FILE. (default: stdout) + --gvcf Write gVCF output, which indicates coverage in uncalled regions. + --gvcf-chunk NUM When writing gVCF output emit a record for every NUM bases. + --gvcf-dont-use-chunk When writing the gVCF output emit a record for all bases if + set to "true" , will also route an int to --gvcf-chunk + similar to --output-mode EMIT_ALL_SITES from GATK + --variant-input VCF Use variants reported in VCF file as input to the algorithm. + Variants in this file will included in the output even if + there is not enough support in the data to pass input filters. + --only-use-input-alleles Only provide variant calls and genotype likelihoods for sites + and alleles which are provided in the VCF input, and provide + output in the VCF for all input alleles, not just those which + have support in the data. + --haplotype-basis-alleles VCF When specified, only variant alleles provided in this input + VCF will be used for the construction of complex or haplotype + alleles. + --report-all-haplotype-alleles At sites where genotypes are made over haplotype alleles, + provide information about all alleles in output, not only + those which are called. + --report-monomorphic Report even loci which appear to be monomorphic, and report all + considered alleles, even those which are not in called genotypes. + Loci which do not have any potential alternates have '.' for ALT. + --pvar N Report sites if the probability that there is a polymorphism + at the site is greater than N. default: 0.0. Note that post- + filtering is generally recommended over the use of this parameter. + --strict-vcf Generate strict VCF format (FORMAT/GQ will be an int) + +**Population model**:: + + --theta N The expected mutation rate or pairwise nucleotide diversity + among the population under analysis. This serves as the + single parameter to the Ewens Sampling Formula prior model + default: 0.001 + --ploidy N Sets the default ploidy for the analysis to N. default: 2 + --pooled-discrete Assume that samples result from pooled sequencing. + Model pooled samples using discrete genotypes across pools. + When using this flag, set --ploidy to the number of + alleles in each sample or use the --cnv-map to define + per-sample ploidy. + --pooled-continuous Output all alleles which pass input filters, regardles of + genotyping outcome or model. + +**Reference allele**:: + + --use-reference-allele This flag includes the reference allele in the analysis as + if it is another sample from the same population. + --reference-quality MQ,BQ Assign mapping quality of MQ to the reference allele at each + site and base quality of BQ. default: 100,60 + +**Allele scope**:: + + --use-best-n-alleles N Evaluate only the best N SNP alleles, ranked by sum of + supporting quality scores. (Set to 0 to use all; default: all) + --max-complex-gap + --haplotype-length N Allow haplotype calls with contiguous embedded matches of up + to this length. Set N=-1 to disable clumping. (default: 3) + --min-repeat-size When assembling observations across repeats, require the total repeat + length at least this many bp. (default: 5) + --min-repeat-entropy N To detect interrupted repeats, build across sequence until it has + entropy > N bits per bp. Set to 0 to turn off. (default: 1) + --no-partial-observations Exclude observations which do not fully span the dynamically-determined + detection window. (default, use all observations, dividing partial + support across matching haplotypes when generating haplotypes.) + +**Indel realignment**:: + + --dont-left-align-indels Turn off left-alignment of indels, which is enabled by default. + +**Input filters**:: + + --use-duplicate-reads Include duplicate-marked alignments in the analysis. + default: exclude duplicates marked as such in alignments + --min-mapping-quality Q Exclude alignments from analysis if they have a mapping + quality less than Q. default: 1 + --min-base-quality Q Exclude alleles from analysis if their supporting base + quality is less than Q. default: 0 + --min-supporting-allele-qsum Q Consider any allele in which the sum of qualities of supporting + observations is at least Q. default: 0 + --min-supporting-mapping-qsum Q Consider any allele in which and the sum of mapping qualities of + supporting reads is at least Q. default: 0 + --mismatch-base-quality-threshold Q Count mismatches toward --read-mismatch-limit if the base + quality of the mismatch is >= Q. default: 10 + --read-mismatch-limit N Exclude reads with more than N mismatches where each mismatch + has base quality >= mismatch-base-quality-threshold. + default: ~unbounded + --read-max-mismatch-fraction N Exclude reads with more than N [0,1] fraction of mismatches where + each mismatch has base quality >= mismatch-base-quality-threshold + default: 1.0 + --read-snp-limit N Exclude reads with more than N base mismatches, ignoring gaps + with quality >= mismatch-base-quality-threshold. + default: ~unbounded + --read-indel-limit N Exclude reads with more than N separate gaps. + default: ~unbounded + --standard-filters Use stringent input base and mapping quality filters + Equivalent to -m 30 -q 20 -R 0 -S 0 + --min-alternate-fraction N Require at least this fraction of observations supporting + an alternate allele within a single individual in the + in order to evaluate the position. default: 0.05 + --min-alternate-count N Require at least this count of observations supporting + an alternate allele within a single individual in order + to evaluate the position. default: 2 + --min-alternate-qsum N Require at least this sum of quality of observations supporting + an alternate allele within a single individual in order + to evaluate the position. default: 0 + --min-alternate-total N Require at least this count of observations supporting + an alternate allele within the total population in order + to use the allele in analysis. default: 1 + --min-coverage N Require at least this coverage to process a site. default: 0 + --limit-coverage N Downsample per-sample coverage to this level if greater than this coverage. + default: no limit + --skip-coverage N Skip processing of alignments overlapping positions with coverage >N. + This filters sites above this coverage, but will also reduce data nearby. + default: no limit + +**Population priors**:: + + --no-population-priors Equivalent to --pooled-discrete --hwe-priors-off and removal of + Ewens Sampling Formula component of priors. + +**Mappability priors**:: + + --hwe-priors-off Disable estimation of the probability of the combination + arising under HWE given the allele frequency as estimated + by observation frequency. + --binomial-obs-priors-off Disable incorporation of prior expectations about observations. + Uses read placement probability, strand balance probability, + and read position (5'-3') probability. + --allele-balance-priors-off Disable use of aggregate probability of observation balance between alleles + as a component of the priors. + +**Genotype likelihoods**:: + + --observation-bias FILE Read length-dependent allele observation biases from FILE. + The format is [length] [alignment efficiency relative to reference] + where the efficiency is 1 if there is no relative observation bias. + --base-quality-cap Q Limit estimated observation quality by capping base quality at Q. + --prob-contamination F An estimate of contamination to use for all samples. default: 10e-9 + --legacy-gls Use legacy (polybayes equivalent) genotype likelihood calculations + --contamination-estimates FILE A file containing per-sample estimates of contamination, such as + those generated by VerifyBamID. The format should be: + sample p(read=R|genotype=AR) p(read=A|genotype=AA) + Sample '*' can be used to set default contamination estimates. + +**Algorithmic features**:: + + --report-genotype-likelihood-max Report genotypes using the maximum-likelihood estimate provided + from genotype likelihoods. + --genotyping-max-iterations N Iterate no more than N times during genotyping step. default: 1000. + --genotyping-max-banddepth N Integrate no deeper than the Nth best genotype by likelihood when + genotyping. default: 6. + --posterior-integration-limits N,M Integrate all genotype combinations in our posterior space + which include no more than N samples with their Mth best + data likelihood. default: 1,3. + --exclude-unobserved-genotypes Skip sample genotypings for which the sample has no supporting reads. + --genotype-variant-threshold N Limit posterior integration to samples where the second-best + genotype likelihood is no more than log(N) from the highest + genotype likelihood for the sample. default: ~unbounded + --use-mapping-quality Use mapping quality of alleles when calculating data likelihoods. + --harmonic-indel-quality Use a weighted sum of base qualities around an indel, scaled by the + distance from the indel. By default use a minimum BQ in flanking sequence. + --read-dependence-factor N Incorporate non-independence of reads by scaling successive + observations by this factor during data likelihood + calculations. default: 0.9 + --genotype-qualities Calculate the marginal probability of genotypes and report as GQ in + each sample field in the VCF output. + +------ + +**Acknowledgments** + +The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko. +TNG was developed by Bjoern Gruening. +]]> + </help> + <expand macro="citations"> + <citation type="bibtex"> + @article{Tange2011a, + title = {GNU Parallel - The Command-Line Power Tool}, + author = {O. Tange}, + address = {Frederiksberg, Denmark}, + journal = {;login: The USENIX Magazine}, + month = {Feb}, + number = {1}, + volume = {36}, + url = {http://www.gnu.org/s/parallel}, + year = {2011}, + pages = {42-47} + } + </citation> + </expand> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/leftalign.xml Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,70 @@ +<?xml version="1.0"?> +<tool id="bamleftalign" name="BamLeftAlign" version="@DEPENDENCY_VERSION@"> + <description> indels in BAM datasets</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ + ##set up input files + #set $reference_fasta_filename = "localref.fa" + #if str( $reference_source.reference_source_selector ) == "history": + ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && + samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for leftalign" >&2 && + #else: + #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) + #end if + + ##start leftalign commandline + cat '${input_bam}' | + bamleftalign + --fasta-reference '${reference_fasta_filename}' + -c + --max-iterations "${iterations}" + > '${output_bam}' + ]]></command> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="input_bam" type="data" format="bam" label="Select alignment file in BAM format"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" + metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="ref_file" type="select" label="Using reference genome" argument="--fasta-reference"> + <options from_data_table="fasta_indexes"></options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="input_bam" type="data" format="bam" label="BAM dataset to re-align" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file" argument="--fasta-reference" /> + </when> + </conditional> + <param name="iterations" type="integer" value="5" label="Maximum number of iterations" + help="Iterate the left-realignment no more than this many times" argument="--max-iterations" /> + </inputs> + <outputs> + <data format="bam" name="output_bam" label="${tool.name} on ${on_string} (alignments)" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="leftalign.fa"/> + <param name="input_bam" ftype="bam" value="left-align-input.bam"/> + <param name="iterations" value="5"/> + <output name="output_bam" file="left-align-output.bam" /> + </test> + </tests> + <help> + +When calling indels, it is important to homogenize the positional distribution of insertions and deletions in the input by using left realignment. Left realignment will place all indels in homopolymer and microsatellite repeats at the same position, provided that doing so does not introduce mismatches between the read and reference other than the indel. This method is computationally inexpensive and handles the most common classes of alignment inconsistency. + +This is leftalign utility from FreeBayes package. + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,56 @@ +<macros> + <token name="@DEPENDENCY_VERSION@">1.3.1</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@DEPENDENCY_VERSION@">freebayes</requirement> + <requirement type="package" version="1.9">samtools</requirement> + <yield /> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{1207.3907, + Author = {Erik Garrison}, + Title = {Haplotype-based variant detection from short-read sequencing}, + Year = {2012}, + Eprint = {arXiv:1207.3907}, + url = {http://arxiv.org/abs/1207.3907} + } + </citation> + <yield /> + </citations> + </xml> + <xml name="validation"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </xml> + <xml name="input_bam"> + <conditional name="batchmode"> + <param name="processmode" type="select" label="Run in batch mode?" help="Selecting individual mode will generate one VCF dataset for each input BAM dataset. Selecting the merge option will produce one VCF dataset for all input BAM datasets" display="radio"> + <option value="individual" selected="true">Run individually</option> + <option value="merge">Merge output VCFs</option> + </param> + <when value="individual"> + <param name="input_bams" type="data" format="bam" label="BAM dataset"> + <yield /> + </param> + </when> + <when value="merge"> + <param name="input_bams" type="data" format="bam" multiple="true" label="BAM dataset(s)"> + <yield /> + </param> + </when> + </conditional> + </xml> + <token name="@COVERAGE@"> + --min-coverage ${coverage_options.min_coverage} + --skip-coverage ${coverage_options.skip_coverage} + --limit-coverage ${coverage_options.limit_coverage} + </token> + <xml name="par_min_cov"> + <param name="min_coverage" argument="--min-coverage" type="integer" value="0" label="Require at least this coverage to process a site" /> + <param name="limit_coverage" argument="--limit-coverage" type="integer" value="0" label="Downsample per-sample coverage to this level if greater than this coverage" /> + <param name="skip_coverage" argument="--skip-coverage" type="integer" value="0" label="Skip processing of alignments overlapping positions with coverage greater than this" /> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-hxb2-test5.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,74 @@ +##fileformat=VCFv4.2 +##fileDate=20191101 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=K03455,length=9719> +##phasing=none +##commandline="freebayes --region K03455:0..9719 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_K03455:0..9719.vcf --min-coverage 250 --skip-coverage 0 --limit-coverage 0" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##INFO=<ID=technology.ILLUMINA,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from ILLUMINA"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR8525881 +K03455 2669 . G A 6485.33 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=250;CIGAR=1X;DP=250;DPB=250;DPRA=0;EPP=371.605;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=351.179;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=9021;QR=0;RO=0;RPL=0;RPP=545.878;RPPR=0;RPR=250;RUN=1;SAF=228;SAP=371.605;SAR=22;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:250:0,250:0:0:250:9021:-811.724,-75.2575,0 +K03455 2720 . T C 10370.9 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=380;CIGAR=1X;DP=384;DPB=384;DPRA=0;EPP=164.294;EPPR=0;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=531.397;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=12594;QR=0;RO=0;RPL=82;RPP=269.621;RPPR=0;RPR=298;RUN=1;SAF=326;SAP=425.785;SAR=54;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:384:0,380:0:0:380:12594:-1132.93,-114.391,0 +K03455 2728 . T A 4.41787e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=25;CIGAR=1X;DP=459;DPB=459;DPRA=0;EPP=28.1125;EPPR=3.87378;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=475.527;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=444;QR=13384;RO=425;RPL=21;RPP=28.1125;RPPR=38.2086;RPR=4;RUN=1;SAF=25;SAP=57.2971;SAR=0;SRF=356;SRP=423.862;SRR=69;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:459:425,25:425:13384:25:444:0,-95.5268,-1164.2 +K03455 2797 . G A 20542.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=666;CIGAR=1X;DP=666;DPB=666;DPRA=0;EPP=133.429;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=927.877;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=23333;QR=0;RO=0;RPL=319;RPP=5.56651;RPPR=0;RPR=347;RUN=1;SAF=474;SAP=262.296;SAR=192;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:666:0,666:0:0:666:23333:-2098.99,-200.486,0 +K03455 2802 . C A 3.80909e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=57;CIGAR=1X;DP=672;DPB=672;DPRA=0;EPP=126.784;EPPR=65.6713;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=627.193;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1084;QR=20184;RO=613;RPL=56;RPP=118.251;RPPR=63.8009;RPR=1;RUN=1;SAF=56;SAP=118.251;SAR=1;SRF=403;SRP=134.96;SRR=210;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:672:613,57:613:20184:57:1084:0,-104.146,-1718.1 +K03455 2848 . TAAAAAAGAAAAAATC TAAAAAAAGAAAAAATC 18102 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=630;CIGAR=1M1I15M;DP=722;DPB=774.875;DPRA=0;EPP=43.2136;EPPR=13.8677;GTI=0;LEN=1;MEANALT=40;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=830.486;PAIRED=1;PAIREDR=1;PAO=22;PQA=531.5;PQR=553.5;PRO=23;QA=20369;QR=158;RO=5;RPL=282;RPP=18.0245;RPPR=3.44459;RPR=348;RUN=1;SAF=333;SAP=7.47733;SAR=297;SRF=3;SRP=3.44459;SRR=2;TYPE=ins;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:722:5,630:5:158:630:20369:-1818.16,-176.934,0 +K03455 2873 . AC AT 21993.4 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=754;CIGAR=1M1X;DP=773;DPB=775.5;DPRA=0;EPP=5.26817;EPPR=5.18177;GTI=0;LEN=1;MEANALT=5;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=1039.03;PAIRED=1;PAIREDR=1;PAO=2.5;PQA=63;PQR=63;PRO=2.5;QA=24534;QR=37;RO=1;RPL=357;RPP=7.6182;RPPR=5.18177;RPR=397;RUN=1;SAF=378;SAP=3.02182;SAR=376;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:773:1,754:1:37:754:24534:-2203.73,-223.941,0 +K03455 2882 . G T 21924 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=721;CIGAR=1X;DP=736;DPB=736;DPRA=0;EPP=36.2149;EPPR=22.5536;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=945.018;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=24583;QR=177;RO=9;RPL=353;RPP=3.68794;RPPR=9.04217;RPR=368;RUN=1;SAF=363;SAP=3.08559;SAR=358;SRF=7;SRP=9.04217;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:736:9,721:9:177:721:24583:-2195.37,-203.838,0 +K03455 2883 . GGT GGG 2.65192e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=46;CIGAR=2M1X;DP=726;DPB=731;DPRA=0;EPP=86.2815;EPPR=8.74455;GTI=0;LEN=1;MEANALT=6;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=660.068;PAIRED=1;PAIREDR=1;PAO=3.5;PQA=112;PQR=112;PRO=4.5;QA=1167;QR=22483;RO=668;RPL=43;RPP=78.5398;RPPR=3.12733;RPR=3;RUN=1;SAF=45;SAP=94.401;SAR=1;SRF=316;SRP=7.22322;SRR=352;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:726:668,46:668:22483:46:1167:0,-109.931,-1917.55 +K03455 2894 . T C 21733.9 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=717;CIGAR=1X;DP=723;DPB=723;DPRA=0;EPP=10.8876;EPPR=5.18177;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=991.951;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=24231;QR=17;RO=1;RPL=373;RPP=5.55731;RPPR=5.18177;RPR=344;RUN=1;SAF=354;SAP=3.25561;SAR=363;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:723:1,717:1:17:717:24231:-2178.13,-214.613,0 +K03455 2906 . C T 20694.7 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=650;CIGAR=1X;DP=653;DPB=653;DPRA=0;EPP=3.86553;EPPR=3.0103;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=885.229;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=23147;QR=65;RO=2;RPL=368;RPP=27.7183;RPPR=7.35324;RPR=282;RUN=1;SAF=301;SAP=10.7073;SAR=349;SRF=1;SRP=3.0103;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:653:2,650:2:65:650:23147:-2076.11,-190.301,0 +K03455 2913 . G A 20816.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=638;CIGAR=1X;DP=639;DPB=639;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=878.93;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=23329;QR=31;RO=1;RPL=402;RPP=96.7988;RPPR=5.18177;RPR=236;RUN=1;SAF=297;SAP=9.5996;SAR=341;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:639:1,638:1:31:638:23329:-2095.39,-189.259,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-hxb2-test6.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,83 @@ +##fileformat=VCFv4.2 +##fileDate=20191106 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=K03455,length=9719> +##phasing=none +##commandline="freebayes --region K03455:0..9719 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_K03455:0..9719.vcf --min-coverage 0 --skip-coverage 0 --limit-coverage 400" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##INFO=<ID=technology.ILLUMINA,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from ILLUMINA"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR8525881 +K03455 2652 . G A 4664.77 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=200;CIGAR=1X;DP=200;DPB=200;DPRA=0;EPP=316.788;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=281.864;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=6697;QR=0;RO=0;RPL=0;RPP=437.305;RPPR=0;RPR=200;RUN=1;SAF=185;SAP=316.788;SAR=15;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:200:0,200:0:0:200:6697:-602.765,-60.206,0 +K03455 2660 . TTGTA CTGTG 5199.64 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=207;CIGAR=1X3M1X;DP=213;DPB=218;DPRA=0;EPP=324.273;EPPR=0;GTI=0;LEN=5;MEANALT=5;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=291.568;PAIRED=1;PAIREDR=0;PAO=25;PQA=744;PQR=0;PRO=0;QA=7336;QR=0;RO=0;RPL=0;RPP=452.505;RPPR=0;RPR=207;RUN=1;SAF=191;SAP=324.273;SAR=16;SRF=0;SRP=0;SRR=0;TYPE=complex;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:213:0,207:0:0:207:7336:-659.951,-62.3132,0 +K03455 2669 . G A 6485.33 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=250;CIGAR=1X;DP=250;DPB=250;DPRA=0;EPP=371.605;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=351.179;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=9021;QR=0;RO=0;RPL=0;RPP=545.878;RPPR=0;RPR=250;RUN=1;SAF=228;SAP=371.605;SAR=22;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:250:0,250:0:0:250:9021:-811.724,-75.2575,0 +K03455 2720 . T C 10370.9 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=380;CIGAR=1X;DP=384;DPB=384;DPRA=0;EPP=164.294;EPPR=0;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=531.397;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=12594;QR=0;RO=0;RPL=82;RPP=269.621;RPPR=0;RPR=298;RUN=1;SAF=326;SAP=425.785;SAR=54;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:384:0,380:0:0:380:12594:-1132.93,-114.391,0 +K03455 2728 . T A 4.41787e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=25;CIGAR=1X;DP=459;DPB=459;DPRA=0;EPP=28.1125;EPPR=3.87378;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=475.527;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=444;QR=13384;RO=425;RPL=21;RPP=28.1125;RPPR=38.2086;RPR=4;RUN=1;SAF=25;SAP=57.2971;SAR=0;SRF=356;SRP=423.862;SRR=69;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:459:425,25:425:13384:25:444:0,-95.5268,-1164.2 +K03455 2797 . G A 20542.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=666;CIGAR=1X;DP=666;DPB=666;DPRA=0;EPP=133.429;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=927.877;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=23333;QR=0;RO=0;RPL=319;RPP=5.56651;RPPR=0;RPR=347;RUN=1;SAF=474;SAP=262.296;SAR=192;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:666:0,666:0:0:666:23333:-2098.99,-200.486,0 +K03455 2802 . C A 3.80909e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=57;CIGAR=1X;DP=672;DPB=672;DPRA=0;EPP=126.784;EPPR=65.6713;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=627.193;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1084;QR=20184;RO=613;RPL=56;RPP=118.251;RPPR=63.8009;RPR=1;RUN=1;SAF=56;SAP=118.251;SAR=1;SRF=403;SRP=134.96;SRR=210;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:672:613,57:613:20184:57:1084:0,-104.146,-1718.1 +K03455 2848 . TAAAAAAGAAAAAATC TAAAAAAAGAAAAAATC 18102 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=630;CIGAR=1M1I15M;DP=722;DPB=774.875;DPRA=0;EPP=43.2136;EPPR=13.8677;GTI=0;LEN=1;MEANALT=40;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=830.486;PAIRED=1;PAIREDR=1;PAO=22;PQA=531.5;PQR=553.5;PRO=23;QA=20369;QR=158;RO=5;RPL=282;RPP=18.0245;RPPR=3.44459;RPR=348;RUN=1;SAF=333;SAP=7.47733;SAR=297;SRF=3;SRP=3.44459;SRR=2;TYPE=ins;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:722:5,630:5:158:630:20369:-1818.16,-176.934,0 +K03455 2873 . AC AT 21993.4 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=754;CIGAR=1M1X;DP=773;DPB=775.5;DPRA=0;EPP=5.26817;EPPR=5.18177;GTI=0;LEN=1;MEANALT=5;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=1039.03;PAIRED=1;PAIREDR=1;PAO=2.5;PQA=63;PQR=63;PRO=2.5;QA=24534;QR=37;RO=1;RPL=357;RPP=7.6182;RPPR=5.18177;RPR=397;RUN=1;SAF=378;SAP=3.02182;SAR=376;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:773:1,754:1:37:754:24534:-2203.73,-223.941,0 +K03455 2882 . G T 21924 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=721;CIGAR=1X;DP=736;DPB=736;DPRA=0;EPP=36.2149;EPPR=22.5536;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=945.018;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=24583;QR=177;RO=9;RPL=353;RPP=3.68794;RPPR=9.04217;RPR=368;RUN=1;SAF=363;SAP=3.08559;SAR=358;SRF=7;SRP=9.04217;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:736:9,721:9:177:721:24583:-2195.37,-203.838,0 +K03455 2883 . GGT GGG 2.65192e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=46;CIGAR=2M1X;DP=726;DPB=731;DPRA=0;EPP=86.2815;EPPR=8.74455;GTI=0;LEN=1;MEANALT=6;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=660.068;PAIRED=1;PAIREDR=1;PAO=3.5;PQA=112;PQR=112;PRO=4.5;QA=1167;QR=22483;RO=668;RPL=43;RPP=78.5398;RPPR=3.12733;RPR=3;RUN=1;SAF=45;SAP=94.401;SAR=1;SRF=316;SRP=7.22322;SRR=352;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:726:668,46:668:22483:46:1167:0,-109.931,-1917.55 +K03455 2894 . T C 21733.9 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=717;CIGAR=1X;DP=723;DPB=723;DPRA=0;EPP=10.8876;EPPR=5.18177;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=991.951;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=24231;QR=17;RO=1;RPL=373;RPP=5.55731;RPPR=5.18177;RPR=344;RUN=1;SAF=354;SAP=3.25561;SAR=363;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:723:1,717:1:17:717:24231:-2178.13,-214.613,0 +K03455 2906 . C T 20694.7 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=650;CIGAR=1X;DP=653;DPB=653;DPRA=0;EPP=3.86553;EPPR=3.0103;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=885.229;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=23147;QR=65;RO=2;RPL=368;RPP=27.7183;RPPR=7.35324;RPR=282;RUN=1;SAF=301;SAP=10.7073;SAR=349;SRF=1;SRP=3.0103;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:653:2,650:2:65:650:23147:-2076.11,-190.301,0 +K03455 2913 . G A 20816.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=638;CIGAR=1X;DP=639;DPB=639;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=878.93;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=23329;QR=31;RO=1;RPL=402;RPP=96.7988;RPPR=5.18177;RPR=236;RUN=1;SAF=297;SAP=9.5996;SAR=341;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:639:1,638:1:31:638:23329:-2095.39,-189.259,0 +K03455 2987 . C T 6415.71 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=234;CIGAR=1X;DP=235;DPB=235;DPRA=0;EPP=16.4103;EPPR=0;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=328.998;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=7972;QR=0;RO=0;RPL=234;RPP=511.135;RPPR=0;RPR=0;RUN=1;SAF=98;SAP=16.4103;SAR=136;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:235:0,234:0:0:234:7972:-717.259,-70.441,0 +K03455 2992 . T G 2.14568e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=11;CIGAR=1X;DP=202;DPB=202;DPRA=0;EPP=26.8965;EPPR=8.54184;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=220.335;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=218;QR=6192;RO=190;RPL=11;RPP=26.8965;RPPR=415.59;RPR=0;RUN=1;SAF=11;SAP=26.8965;SAR=0;SRF=84;SRP=8.54184;SRR=106;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:202:190,11:190:6192:11:218:0,-40.8623,-537.516 +K03455 3016 . CACCA CACCG 2027.61 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=81;CIGAR=4M1X;DP=92;DPB=92.4;DPRA=0;EPP=25.5561;EPPR=0;GTI=0;LEN=1;MEANALT=6;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=86.0691;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=2598;QR=0;RO=0;RPL=81;RPP=178.9;RPPR=0;RPR=0;RUN=1;SAF=55;SAP=25.5561;SAR=26;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1/1:92:0,81:0:0:81:2598:-233.803,-24.3834,0 +K03455 3025 . T A 0 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=3;CIGAR=1X;DP=51;DPB=51;DPRA=0;EPP=9.52472;EPPR=4.6389;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=61.9298;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=45;QR=1423;RO=48;RPL=3;RPP=9.52472;RPPR=107.241;RPR=0;RUN=1;SAF=3;SAP=9.52472;SAR=0;SRF=27;SRP=4.6389;SRR=21;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:51:48,3:48:1423:3:45:0,-11.1526,-124.109 +K03455 3035 . T A 2.21948e-14 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=3;CIGAR=1X;DP=37;DPB=37;DPRA=0;EPP=9.52472;EPPR=4.65535;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=40.669;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=54;QR=954;RO=33;RPL=3;RPP=9.52472;RPPR=74.6689;RPR=0;RUN=1;SAF=3;SAP=9.52472;SAR=0;SRF=19;SRP=4.65535;SRR=14;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:37:33,3:33:954:3:54:0,-5.96965,-81.0818 +K03455 3036 . A G 1.09205e-09 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=3;CIGAR=1X;DP=24;DPB=24;DPRA=0;EPP=9.52472;EPPR=26.2761;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=22.1038;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=578;RO=21;RPL=3;RPP=9.52472;RPPR=48.6112;RPR=0;RUN=1;SAF=3;SAP=9.52472;SAR=0;SRF=18;SRP=26.2761;SRR=3;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/0:24:21,3:21:578:3:67:0,-0.971771,-46.0254 +K03455 3042 . A C 7.93379e-05 . AB=0.25;ABP=9.52472;AC=1;AF=0.5;AN=2;AO=3;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=9.52472;EPPR=14.8328;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=10.9103;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=52;QR=233;RO=9;RPL=3;RPP=9.52472;RPPR=22.5536;RPR=0;RUN=1;SAF=3;SAP=9.52472;SAR=0;SRF=8;SRP=14.8328;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0/1:12:9,3:9:233:3:52:-1.24091,0,-17.6087
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-hxb2-test7.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,62 @@ +##fileformat=VCFv4.2 +##fileDate=20191030 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=K03455,length=9719> +##phasing=none +##commandline="freebayes --region K03455:0..9719 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_K03455:0..9719.vcf --min-coverage 0 --skip-coverage 100 --limit-coverage 0" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##INFO=<ID=technology.ILLUMINA,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from ILLUMINA"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR8525881
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-hxb2.fasta Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,2 @@ +>K03455 +TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGAGAAGTTAGAAGAAGCCAACAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAGTAGTACATGTAACGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-phix174-test1.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,93 @@ +##fileformat=VCFv4.2 +##fileDate=20191030 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=phiX174,length=5386> +##phasing=none +##commandline="freebayes --region phiX174:0..5386 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_phiX174:0..5386.vcf" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +phiX174 311 . A G 0.00392026 . AB=0.222222;ABP=9.04217;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=7.35324;EPPR=5.80219;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33.5714;NS=1;NUMALT=1;ODDS=7.0097;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=478;RO=7;RPL=0;RPP=7.35324;RPPR=3.32051;RPR=2;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=5;SRP=5.80219;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:9:7,2:7:478:2:70:-1.91487,0,-18.7749 +phiX174 374 . T G 23.3387 . AB=0.5;ABP=3.0103;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=7.35324;EPPR=7.35324;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=25;NS=1;NUMALT=1;ODDS=1.58025;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=141;RO=2;RPL=1;RPP=3.0103;RPPR=7.35324;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=7.35324;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:4:2,2:2:141:2:75:-5.19433,0,-3.54586 +phiX174 913 . A C 2.27318 . AB=0.4;ABP=3.44459;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=3.0103;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=29;NS=1;NUMALT=1;ODDS=0.380296;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=171;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=3;SRP=9.52472;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:5:3,2:3:171:2:67:-3.54411,0,-6.47921 +phiX174 1205 . A C 0.00388031 . AB=0.2;ABP=10.8276;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=7.35324;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=34;NS=1;NUMALT=1;ODDS=7.01995;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=456;RO=8;RPL=2;RPP=7.35324;RPPR=7.35324;RPR=0;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=7.35324;SRR=6;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:10:8,2:8:456:2:67:-2.11454,0,-21.3383 +phiX174 1245 . G T 0.0324554 . AB=0.222222;ABP=9.04217;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=7.35324;EPPR=3.32051;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=28.4286;NS=1;NUMALT=1;ODDS=4.8927;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=389;RO=7;RPL=2;RPP=7.35324;RPPR=3.32051;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=18.2106;SRR=7;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:9:7,2:7:389:2:70:-2.83427,0,-14.5618 +phiX174 1249 . T G 0.0166698 . AB=0.222222;ABP=9.04217;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=3.0103;EPPR=3.32051;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=30.1429;NS=1;NUMALT=1;ODDS=5.56079;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=74;QR=464;RO=7;RPL=1;RPP=3.0103;RPPR=5.80219;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=5.80219;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:9:7,2:7:464:2:74:-1.94207,0,-16.5662 +phiX174 1445 . C A 0.147157 . AB=0.285714;ABP=5.80219;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=7.35324;EPPR=6.91895;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=32.2;NS=1;NUMALT=1;ODDS=3.36782;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=273;RO=5;RPL=1;RPP=3.0103;RPPR=6.91895;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=2;SRP=3.44459;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:7:5,2:5:273:2:76:-2.52649,0,-12.4911 +phiX174 1577 . A C 0.0123232 . AB=0.222222;ABP=9.04217;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=7.35324;EPPR=3.32051;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=35.2857;NS=1;NUMALT=1;ODDS=5.8634;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=60;QR=460;RO=7;RPL=1;RPP=3.0103;RPPR=10.7656;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=4;SRP=3.32051;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:9:7,2:7:460:2:60:-1.81064,0,-19.8257 +phiX174 1631 . T G 0.00100612 . AB=0.2;ABP=10.8276;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=4.09604;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=28;NS=1;NUMALT=1;ODDS=8.3701;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=500;RO=8;RPL=0;RPP=7.35324;RPPR=3.0103;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=3;SRP=4.09604;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:10:8,2:8:500:2:68:-1.52818,0,-17.3788 +phiX174 1665 . C A 0.0164128 . AB=0.166667;ABP=14.5915;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=2;MQM=37;MQMR=30.3333;NS=1;NUMALT=1;ODDS=5.57635;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=65;QR=587;RO=9;RPL=1;RPP=3.0103;RPPR=5.18177;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=4;SRP=3.25157;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:12:9,2:9:587:2:65:-2.35331,0,-21.269 +phiX174 1772 . T G 0.0180574 . AB=0.2;ABP=10.8276;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=3.0103;EPPR=3.32051;GTI=0;LEN=1;MEANALT=2;MQM=31;MQMR=31.8571;NS=1;NUMALT=1;ODDS=5.48067;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=425;RO=7;RPL=1;RPP=3.0103;RPPR=3.32051;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=1;SRP=10.7656;SRR=6;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:10:7,2:7:425:2:59:-1.97686,0,-17.3816 +phiX174 1786 . T G 7.94817e-05 . AB=0.166667;ABP=14.5915;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=7.35324;EPPR=3.87889;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33.4;NS=1;NUMALT=1;ODDS=10.9085;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=537;RO=10;RPL=0;RPP=7.35324;RPPR=6.48466;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=1;SRP=16.9077;SRR=9;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:12:10,2:10:537:2:59:-0.861483,0,-25.1364 +phiX174 1945 . T G 1.01422 . AB=0.333333;ABP=4.45795;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=37;NS=1;NUMALT=1;ODDS=1.3354;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=263;RO=4;RPL=1;RPP=3.0103;RPPR=5.18177;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=3.0103;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:6:4,2:4:263:2:59:-3.25425,0,-11.8637 +phiX174 2230 . T G 0.00580188 . AB=0.2;ABP=10.8276;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=35.5;NS=1;NUMALT=1;ODDS=6.61746;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=491;RO=8;RPL=0;RPP=7.35324;RPPR=4.09604;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=3;SRP=4.09604;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:10:8,2:8:491:2:80:-2.28934,0,-22.3591 +phiX174 2699 . C A 2.04855 . AB=0.5;ABP=3.0103;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=37;NS=1;NUMALT=1;ODDS=0.517362;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=109;RO=2;RPL=1;RPP=3.0103;RPPR=3.0103;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=7.35324;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:4:2,2:2:109:2:76:-3.4054,0,-5.58455 +phiX174 2722 . T G 1.01772 . AB=0.4;ABP=3.44459;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=7.35324;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33;NS=1;NUMALT=1;ODDS=1.33197;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=150;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=9.52472;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:5:3,2:3:150:2:67:-3.1308,0,-7.62253 +phiX174 2814 . T G 0.670623 . AB=0.4;ABP=3.44459;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=3.0103;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33;NS=1;NUMALT=1;ODDS=1.79322;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57;QR=135;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=0;SRP=9.52472;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:5:3,2:3:135:2:57:-2.93048,0,-6.73807 +phiX174 2828 . T G 0.160629 . AB=0.25;ABP=7.35324;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=8;DPB=8;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=31;NS=1;NUMALT=1;ODDS=3.27866;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=100;QR=348;RO=6;RPL=0;RPP=7.35324;RPPR=4.45795;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=1;SRP=8.80089;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:8:6,2:6:348:2:100:-3.34336,0,-13.6819 +phiX174 2983 . T G 1.55467 . AB=0.4;ABP=3.44459;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=3.0103;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=33;NS=1;NUMALT=1;ODDS=0.843116;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57;QR=150;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=3.73412;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:5:3,2:3:150:2:57:-3.34311,0,-7.61298 +phiX174 3110 . T C 1.16857 . AB=0.333333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=3;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=3.73412;EPPR=4.45795;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=35;NS=1;NUMALT=1;ODDS=1.17522;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=94;QR=361;RO=6;RPL=0;RPP=9.52472;RPPR=3.0103;RPR=3;RUN=1;SAF=1;SAP=3.73412;SAR=2;SRF=1;SRP=8.80089;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:9:6,3:6:361:3:94:-4.02963,0,-16.4039 +phiX174 3155 . T G 5.42491 . AB=0.285714;ABP=5.80219;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=3.0103;EPPR=3.44459;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=29.8;NS=1;NUMALT=1;ODDS=0.911202;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=299;RO=5;RPL=1;RPP=3.0103;RPPR=3.44459;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=4;SRP=6.91895;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:7:5,2:5:299:2:75:-4.38484,0,-11.5688 +phiX174 3184 . A C 0.000128182 . AB=0.153846;ABP=16.5402;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=13;DPB=13;DPRA=0;EPP=3.0103;EPPR=3.20771;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33.7273;NS=1;NUMALT=1;ODDS=10.4306;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=738;RO=11;RPL=1;RPP=3.0103;RPPR=4.78696;RPR=1;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=8;SRP=7.94546;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:13:11,2:11:738:2:75:-0.695464,0,-29.8019 +phiX174 3325 . A C 0.0705175 . AB=0.333333;ABP=4.45795;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=31;NS=1;NUMALT=1;ODDS=4.11232;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=58;QR=284;RO=4;RPL=2;RPP=7.35324;RPPR=3.0103;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=3;SRP=5.18177;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:6:4,2:4:284:2:58:-2.65031,0,-9.66348 +phiX174 3413 . T G 0.00284067 . AB=0.181818;ABP=12.6832;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=11;DPB=11;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=31.6667;NS=1;NUMALT=1;ODDS=7.33194;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57;QR=537;RO=9;RPL=1;RPP=3.0103;RPPR=3.25157;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=8;SRP=14.8328;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:11:9,2:9:537:2:57:-1.59087,0,-22.4183 +phiX174 3418 . A C 0.000937887 . AB=0.2;ABP=10.8276;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=34;NS=1;NUMALT=1;ODDS=8.44033;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=60;QR=526;RO=8;RPL=2;RPP=7.35324;RPPR=4.09604;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=5;SRP=4.09604;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:10:8,2:8:526:2:60:-1.49768,0,-21.0207 +phiX174 3490 . A C 0.00226584 . AB=0.181818;ABP=12.6832;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=11;DPB=11;DPRA=0;EPP=7.35324;EPPR=9.04217;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=34.3333;NS=1;NUMALT=1;ODDS=7.5581;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=543;RO=9;RPL=2;RPP=7.35324;RPPR=9.04217;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=9;SRP=22.5536;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:11:9,2:9:543:2:66:-2.09471,0,-24.3433 +phiX174 3729 . C T 0.295112 . AB=0.333333;ABP=4.45795;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=37;NS=1;NUMALT=1;ODDS=2.65478;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=63;QR=232;RO=4;RPL=2;RPP=7.35324;RPPR=5.18177;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=11.6962;SRR=4;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:6:4,2:4:232:2:63:-3.28331,0,-11.7007 +phiX174 4031 . T G 0.0784807 . AB=0.25;ABP=7.35324;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=8;DPB=8;DPRA=0;EPP=7.35324;EPPR=4.45795;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=27;NS=1;NUMALT=1;ODDS=4.00441;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=85;QR=322;RO=6;RPL=2;RPP=7.35324;RPPR=8.80089;RPR=0;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=3;SRP=3.0103;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:8:6,2:6:322:2:85:-3.02817,0,-11.6802 +phiX174 4502 . A C 0.232919 . AB=0.333333;ABP=4.45795;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=37;NS=1;NUMALT=1;ODDS=2.89868;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=55;QR=284;RO=4;RPL=1;RPP=3.0103;RPPR=11.6962;RPR=1;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=3;SRP=5.18177;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:6:4,2:4:284:2:55:-2.57533,0,-11.8832 +phiX174 4558 . C G 0.076247 . AB=0.333333;ABP=4.45795;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=28;NS=1;NUMALT=1;ODDS=4.03372;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=204;RO=4;RPL=2;RPP=7.35324;RPPR=11.6962;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=2;SRP=3.0103;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:6:4,2:4:204:2:59:-2.68445,0,-8.4362 +phiX174 4655 . T G 0.399408 . AB=0.333333;ABP=4.45795;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=3.0103;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=37;NS=1;NUMALT=1;ODDS=2.33999;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=193;RO=4;RPL=1;RPP=3.0103;RPPR=3.0103;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=0;SRP=11.6962;SRR=4;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:6:4,2:4:193:2:70:-2.81796,0,-11.1489 +phiX174 4704 . T C 0.000171782 . AB=0.166667;ABP=14.5915;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=7.35324;EPPR=10.8276;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=33.4;NS=1;NUMALT=1;ODDS=10.1378;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=58;QR=621;RO=10;RPL=0;RPP=7.35324;RPPR=3.87889;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=4;SRP=3.87889;SRR=6;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:12:10,2:10:621:2:58:-1.1962,0,-26.2058
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-phix174-test2.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,80 @@ +##fileformat=VCFv4.2 +##fileDate=20191031 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=phiX174,length=5386> +##phasing=none +##commandline="freebayes --region phiX174:0..5386 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_phiX174:0..5386.vcf --min-coverage 14 --skip-coverage 0 --limit-coverage 0 --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0.05 --pooled-continuous --report-monomorphic --standard-filters" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +phiX174 1134 . A . 0 . DP=14;DPB=14;EPPR=3.0103;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=916;RO=14;RPPR=8.59409 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:916:.:.:0 +phiX174 1135 . C . 0 . DP=14;DPB=14;EPPR=3.63072;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=957;RO=14;RPPR=5.49198 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:957:.:.:0 +phiX174 1136 . G . 0 . DP=14;DPB=14;EPPR=8.59409;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=919;RO=14;RPPR=3.0103 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:919:.:.:0 +phiX174 1137 . C . 0 . DP=14;DPB=14;EPPR=5.49198;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=927;RO=14;RPPR=3.63072 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:927:.:.:0 +phiX174 1138 . C . 0 . DP=14;DPB=14;EPPR=3.63072;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=876;RO=14;RPPR=5.49198 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:876:.:.:0 +phiX174 1139 . G . 0 . DP=14;DPB=14;EPPR=3.63072;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=946;RO=14;RPPR=5.49198 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:946:.:.:0 +phiX174 1140 . T . 0 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=927;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:927:.:.:0 +phiX174 1141 . T G 3.61953e-07 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=15;DPB=15;DPRA=0;EPP=5.18177;EPPR=3.63072;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=37;NS=1;NUMALT=1;ODDS=16.3003;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=50;QR=880;RO=14;RPL=0;RPP=5.18177;RPPR=5.49198;RPR=1;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=5;SRP=5.49198;SRR=9;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:14,1:14:880:1:50:0,-0.836684,-42.5367 +phiX174 1142 . G . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1082;RO=16;RPPR=3.55317 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:1082:.:.:0 +phiX174 1143 . G . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1029;RO=16;RPPR=3.55317 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:1029:.:.:0 +phiX174 1144 . C . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1114;RO=16;RPPR=3.55317 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:1114:.:.:0 +phiX174 1145 . G . 0 . DP=15;DPB=15;EPPR=6.62942;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1042;RO=15;RPPR=3.15506 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:1042:.:.:0 +phiX174 1146 . C . 0 . DP=15;DPB=15;EPPR=6.62942;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1058;RO=15;RPPR=3.15506 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:1058:.:.:0 +phiX174 1147 . T . 0 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=999;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:999:.:.:0 +phiX174 1148 . C . 0 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1003;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:1003:.:.:0 +phiX174 1149 . T C 4.04114e-07 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=14;DPB=14;DPRA=0;EPP=5.18177;EPPR=3.17734;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=37;NS=1;NUMALT=1;ODDS=16.1901;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=33;QR=830;RO=13;RPL=0;RPP=5.18177;RPPR=7.18621;RPR=1;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=5;SRP=4.51363;SRR=8;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:13,1:13:830:1:33:0,-1.0599,-39.8344 +phiX174 1151 . C T 3.45599e-08 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=15;DPB=15;DPRA=0;EPP=5.18177;EPPR=8.59409;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=37;NS=1;NUMALT=1;ODDS=18.6491;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=27;QR=946;RO=14;RPL=1;RPP=5.18177;RPPR=3.0103;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=3;SRP=12.937;SRR=11;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:14,1:14:946:1:27:0,-1.85676,-43.833 +phiX174 1152 . G . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=993;RO=16;RPPR=5.18177 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:993:.:.:0 +phiX174 1153 . T . 3.38668e-15 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=880;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:880:.:.:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-phix174-test3.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,80 @@ +##fileformat=VCFv4.2 +##fileDate=20191031 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=phiX174,length=5386> +##phasing=none +##commandline="freebayes --region phiX174:0..5386 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_phiX174:0..5386.vcf --min-coverage 14 --skip-coverage 0 --limit-coverage 0 --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0.05 --pooled-continuous --report-monomorphic --standard-filters" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +phiX174 1134 . A . 0 . DP=14;DPB=14;EPPR=3.0103;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=916;RO=14;RPPR=8.59409 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:916:.:.:0 +phiX174 1135 . C . 0 . DP=14;DPB=14;EPPR=3.63072;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=957;RO=14;RPPR=5.49198 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:957:.:.:0 +phiX174 1136 . G . 0 . DP=14;DPB=14;EPPR=8.59409;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=919;RO=14;RPPR=3.0103 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:919:.:.:0 +phiX174 1137 . C . 0 . DP=14;DPB=14;EPPR=5.49198;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=927;RO=14;RPPR=3.63072 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:927:.:.:0 +phiX174 1138 . C . 0 . DP=14;DPB=14;EPPR=3.63072;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=876;RO=14;RPPR=5.49198 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:876:.:.:0 +phiX174 1139 . G . 0 . DP=14;DPB=14;EPPR=3.63072;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=946;RO=14;RPPR=5.49198 GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:14:14:946:.:.:0 +phiX174 1140 . T . 0 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=927;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:927:.:.:0 +phiX174 1141 . T G 3.61953e-07 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=15;DPB=15;DPRA=0;EPP=5.18177;EPPR=3.63072;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=37;NS=1;NUMALT=1;ODDS=16.3003;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=50;QR=880;RO=14;RPL=0;RPP=5.18177;RPPR=5.49198;RPR=1;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=5;SRP=5.49198;SRR=9;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:14,1:14:880:1:50:0,-0.836684,-42.5367 +phiX174 1142 . G . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1082;RO=16;RPPR=3.55317 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:1082:.:.:0 +phiX174 1143 . G . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1029;RO=16;RPPR=3.55317 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:1029:.:.:0 +phiX174 1144 . C . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1114;RO=16;RPPR=3.55317 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:1114:.:.:0 +phiX174 1145 . G . 0 . DP=15;DPB=15;EPPR=6.62942;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1042;RO=15;RPPR=3.15506 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:1042:.:.:0 +phiX174 1146 . C . 0 . DP=15;DPB=15;EPPR=6.62942;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1058;RO=15;RPPR=3.15506 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:1058:.:.:0 +phiX174 1147 . T . 0 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=999;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:999:.:.:0 +phiX174 1148 . C . 0 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=1003;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:1003:.:.:0 +phiX174 1149 . T C 4.04114e-07 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=14;DPB=14;DPRA=0;EPP=5.18177;EPPR=3.17734;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=37;NS=1;NUMALT=1;ODDS=16.1901;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=33;QR=830;RO=13;RPL=0;RPP=5.18177;RPPR=7.18621;RPR=1;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=5;SRP=4.51363;SRR=8;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:14:13,1:13:830:1:33:0,-1.0599,-39.8344 +phiX174 1151 . C T 3.45599e-08 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=15;DPB=15;DPRA=0;EPP=5.18177;EPPR=8.59409;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=37;NS=1;NUMALT=1;ODDS=18.6491;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=27;QR=946;RO=14;RPL=1;RPP=5.18177;RPPR=3.0103;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=3;SRP=12.937;SRR=11;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:14,1:14:946:1:27:0,-1.85676,-43.833 +phiX174 1152 . G . 0 . DP=16;DPB=16;EPPR=5.18177;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=993;RO=16;RPPR=5.18177 GT:DP:AD:RO:QR:AO:QA:GL 0/0:16:16:16:993:.:.:0 +phiX174 1153 . T . 3.38668e-15 . DP=15;DPB=15;EPPR=4.31318;GTI=0;MQMR=37;NS=1;NUMALT=0;ODDS=0;PAIREDR=0;PQR=0;PRO=0;QR=880;RO=15;RPPR=4.31318 GT:DP:AD:RO:QR:AO:QA:GL 0/0:15:15:15:880:.:.:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-phix174-test4.vcf Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,93 @@ +##fileformat=VCFv4.2 +##fileDate=20191030 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=phiX174,length=5386> +##phasing=none +##commandline="freebayes --region phiX174:0..5386 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_phiX174:0..5386.vcf --theta 0.001 --ploidy 1" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +phiX174 311 . A G 5.72946e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=7.35324;EPPR=5.80219;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33.5714;NS=1;NUMALT=1;ODDS=38.0687;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=478;RO=7;RPL=0;RPP=7.35324;RPPR=3.32051;RPR=2;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=5;SRP=5.80219;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:9:7,2:7:478:2:70:0,-16.86 +phiX174 374 . T G 22.5297 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=2;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=7.35324;EPPR=7.35324;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=25;NS=1;NUMALT=1;ODDS=5.18205;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=141;RO=2;RPL=1;RPP=3.0103;RPPR=7.35324;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=7.35324;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1:4:2,2:2:141:2:75:-1.64847,0 +phiX174 913 . A C 0.0178937 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=3.0103;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=29;NS=1;NUMALT=1;ODDS=5.4898;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=171;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=3;SRP=9.52472;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:5:3,2:3:171:2:67:0,-2.9351 +phiX174 1205 . A C 6.98431e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=7.35324;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=34;NS=1;NUMALT=1;ODDS=41.7843;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=456;RO=8;RPL=2;RPP=7.35324;RPPR=7.35324;RPR=0;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=7.35324;SRR=6;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:10:8,2:8:456:2:67:0,-19.2238 +phiX174 1245 . G T 2.17569e-10 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=7.35324;EPPR=3.32051;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=28.4286;NS=1;NUMALT=1;ODDS=23.7171;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=389;RO=7;RPL=2;RPP=7.35324;RPPR=3.32051;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=18.2106;SRR=7;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:9:7,2:7:389:2:70:0,-11.7275 +phiX174 1249 . T G 9.08243e-14 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=3.0103;EPPR=3.32051;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=30.1429;NS=1;NUMALT=1;ODDS=31.5341;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=74;QR=464;RO=7;RPL=1;RPP=3.0103;RPPR=5.80219;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=5.80219;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:9:7,2:7:464:2:74:0,-14.6241 +phiX174 1445 . C A 3.85961e-09 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=7.35324;EPPR=6.91895;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=32.2;NS=1;NUMALT=1;ODDS=20.8413;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=273;RO=5;RPL=1;RPP=3.0103;RPPR=6.91895;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=2;SRP=3.44459;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:7:5,2:5:273:2:76:0,-9.96463 +phiX174 1577 . A C 7.60962e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=7.35324;EPPR=3.32051;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=35.2857;NS=1;NUMALT=1;ODDS=38.7543;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=60;QR=460;RO=7;RPL=1;RPP=3.0103;RPPR=10.7656;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=4;SRP=3.32051;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:9:7,2:7:460:2:60:0,-18.0151 +phiX174 1631 . T G 8.14828e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=4.09604;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=28;NS=1;NUMALT=1;ODDS=36.3199;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=500;RO=8;RPL=0;RPP=7.35324;RPPR=3.0103;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=3;SRP=4.09604;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:10:8,2:8:500:2:68:0,-15.8506 +phiX174 1665 . C A 1.45784e-14 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=2;MQM=37;MQMR=30.3333;NS=1;NUMALT=1;ODDS=41.3105;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=65;QR=587;RO=9;RPL=1;RPP=3.0103;RPPR=5.18177;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=4;SRP=3.25157;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:12:9,2:9:587:2:65:0,-18.9157 +phiX174 1772 . T G 2.89903e-14 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=3.0103;EPPR=3.32051;GTI=0;LEN=1;MEANALT=2;MQM=31;MQMR=31.8571;NS=1;NUMALT=1;ODDS=32.7439;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=425;RO=7;RPL=1;RPP=3.0103;RPPR=3.32051;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=1;SRP=10.7656;SRR=6;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:10:7,2:7:425:2:59:0,-15.4048 +phiX174 1786 . T G 0 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=7.35324;EPPR=3.87889;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33.4;NS=1;NUMALT=1;ODDS=51.6968;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=537;RO=10;RPL=0;RPP=7.35324;RPPR=6.48466;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=1;SRP=16.9077;SRR=9;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:12:10,2:10:537:2:59:0,-24.2749 +phiX174 1945 . T G 2.84647e-08 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=37;NS=1;NUMALT=1;ODDS=18.8432;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=263;RO=4;RPL=1;RPP=3.0103;RPPR=5.18177;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=3.0103;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:6:4,2:4:263:2:59:0,-8.60945 +phiX174 2230 . T G 1.46203e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=35.5;NS=1;NUMALT=1;ODDS=46.0348;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=491;RO=8;RPL=0;RPP=7.35324;RPPR=4.09604;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=3;SRP=4.09604;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:10:8,2:8:491:2:80:0,-20.0697 +phiX174 2699 . C A 0.0286554 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=37;NS=1;NUMALT=1;ODDS=5.01766;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=109;RO=2;RPL=1;RPP=3.0103;RPPR=3.0103;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=7.35324;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:4:2,2:2:109:2:76:0,-2.17914 +phiX174 2722 . T G 0.000497664 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=7.35324;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33;NS=1;NUMALT=1;ODDS=9.07408;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=67;QR=150;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=9.52472;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:5:3,2:3:150:2:67:0,-4.49173 +phiX174 2814 . T G 0.00240428 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=3.0103;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33;NS=1;NUMALT=1;ODDS=7.49878;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57;QR=135;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=0;SRP=9.52472;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:5:3,2:3:135:2:57:0,-3.80759 +phiX174 2828 . T G 4.53266e-10 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=8;DPB=8;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=31;NS=1;NUMALT=1;ODDS=22.9831;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=100;QR=348;RO=6;RPL=0;RPP=7.35324;RPPR=4.45795;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=1;SRP=8.80089;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:8:6,2:6:348:2:100:0,-10.3385 +phiX174 2983 . T G 0.000276493 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=5;DPB=5;DPRA=0;EPP=3.0103;EPPR=3.73412;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=33;NS=1;NUMALT=1;ODDS=9.66185;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57;QR=150;RO=3;RPL=1;RPP=3.0103;RPPR=3.73412;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=2;SRP=3.73412;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:5:3,2:3:150:2:57:0,-4.26987 +phiX174 3110 . T C 4.70002e-12 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=3;CIGAR=1X;DP=9;DPB=9;DPRA=0;EPP=3.73412;EPPR=4.45795;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=35;NS=1;NUMALT=1;ODDS=27.5529;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=94;QR=361;RO=6;RPL=0;RPP=9.52472;RPPR=3.0103;RPR=3;RUN=1;SAF=1;SAP=3.73412;SAR=2;SRF=1;SRP=8.80089;SRR=5;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:9:6,3:6:361:3:94:0,-12.3743 +phiX174 3155 . T G 1.16474e-06 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=3.0103;EPPR=3.44459;GTI=0;LEN=1;MEANALT=1;MQM=37;MQMR=29.8;NS=1;NUMALT=1;ODDS=15.1316;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=299;RO=5;RPL=1;RPP=3.0103;RPPR=3.44459;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=4;SRP=6.91895;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:7:5,2:5:299:2:75:0,-7.18392 +phiX174 3184 . A C 0 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=13;DPB=13;DPRA=0;EPP=3.0103;EPPR=3.20771;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=33.7273;NS=1;NUMALT=1;ODDS=63.9593;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=738;RO=11;RPL=1;RPP=3.0103;RPPR=4.78696;RPR=1;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=8;SRP=7.94546;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:13:11,2:11:738:2:75:0,-29.1064 +phiX174 3325 . A C 2.80881e-07 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=31;NS=1;NUMALT=1;ODDS=16.5539;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=58;QR=284;RO=4;RPL=2;RPP=7.35324;RPPR=3.0103;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=3;SRP=5.18177;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:6:4,2:4:284:2:58:0,-7.01317 +phiX174 3413 . T G 6.16289e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=11;DPB=11;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=31.6667;NS=1;NUMALT=1;ODDS=43.4788;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57;QR=537;RO=9;RPL=1;RPP=3.0103;RPPR=3.25157;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=8;SRP=14.8328;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:11:9,2:9:537:2:57:0,-20.8274 +phiX174 3418 . A C 0 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=34;NS=1;NUMALT=1;ODDS=44.7759;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=60;QR=526;RO=8;RPL=2;RPP=7.35324;RPPR=4.09604;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=5;SRP=4.09604;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:10:8,2:8:526:2:60:0,-19.523 +phiX174 3490 . A C 0 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=11;DPB=11;DPRA=0;EPP=7.35324;EPPR=9.04217;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=34.3333;NS=1;NUMALT=1;ODDS=43.8402;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=543;RO=9;RPL=2;RPP=7.35324;RPPR=9.04217;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=9;SRP=22.5536;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:11:9,2:9:543:2:66:0,-22.2486 +phiX174 3729 . C T 6.64464e-08 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=37;NS=1;NUMALT=1;ODDS=17.9954;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=63;QR=232;RO=4;RPL=2;RPP=7.35324;RPPR=5.18177;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=11.6962;SRR=4;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:6:4,2:4:232:2:63:0,-8.41737 +phiX174 4031 . T G 2.20192e-08 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=8;DPB=8;DPRA=0;EPP=7.35324;EPPR=4.45795;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=27;NS=1;NUMALT=1;ODDS=19.0999;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=85;QR=322;RO=6;RPL=2;RPP=7.35324;RPPR=8.80089;RPR=0;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=3;SRP=3.0103;SRR=3;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:8:6,2:6:322:2:85:0,-8.65207 +phiX174 4502 . A C 3.4202e-08 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=37;NS=1;NUMALT=1;ODDS=18.6595;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=55;QR=284;RO=4;RPL=1;RPP=3.0103;RPPR=11.6962;RPR=1;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=3;SRP=5.18177;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:6:4,2:4:284:2:55:0,-9.30785 +phiX174 4558 . C G 1.36745e-05 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=7.35324;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=28;NS=1;NUMALT=1;ODDS=12.6685;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=59;QR=204;RO=4;RPL=2;RPP=7.35324;RPPR=11.6962;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=2;SRP=3.0103;SRR=2;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:6:4,2:4:204:2:59:0,-5.75175 +phiX174 4655 . T G 1.44134e-07 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=6;DPB=6;DPRA=0;EPP=3.0103;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=25;MQMR=37;NS=1;NUMALT=1;ODDS=17.2211;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=193;RO=4;RPL=1;RPP=3.0103;RPPR=3.0103;RPR=1;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=0;SRP=11.6962;SRR=4;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:6:4,2:4:193:2:70:0,-8.33095 +phiX174 4704 . T C 1.08534e-15 . AB=0;ABP=0;AC=0;AF=0;AN=1;AO=2;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=7.35324;EPPR=10.8276;GTI=0;LEN=1;MEANALT=1;MQM=31;MQMR=33.4;NS=1;NUMALT=1;ODDS=55.4522;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=58;QR=621;RO=10;RPL=0;RPP=7.35324;RPPR=3.87889;RPR=2;RUN=1;SAF=0;SAP=7.35324;SAR=2;SRF=4;SRP=3.87889;SRR=6;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0:12:10,2:10:621:2:58:0,-25.0096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/freebayes-phix174.fasta Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,2 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCaGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCgTGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAAtGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTaCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCAtTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/leftalign.fa Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,2 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCaGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCgTGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAAtGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTaCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCAtTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Aug 14 13:59:45 2020 +0000 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Location of SAMTools indexes for FASTA files --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>