# HG changeset patch
# User devteam
# Date 1386794479 18000
# Node ID e21073b0dc1f1ee12e54cef96589124b40e2c7fe
Uploaded initial revision.
diff -r 000000000000 -r e21073b0dc1f freebayes.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/freebayes.xml Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,676 @@
+
+
+
+ freebayes
+ samtools
+
+ - Bayesian genetic variant detector
+
+ ##set up input files
+ #set $reference_fasta_filename = "localref.fa"
+ #if str( $reference_source.reference_source_selector ) == "history":
+ ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
+ samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 &&
+ #else:
+ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+ #end if
+ #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
+ ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &&
+ ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
+ #end for
+ ##finished setting up inputs
+
+ ##start FreeBayes commandline
+ freebayes
+ #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
+ --bam "localbam_${bam_count}.bam"
+ #end for
+ --fasta-reference "${reference_fasta_filename}"
+
+ ##outputs
+ --vcf "${output_vcf}"
+
+ ##advanced options
+ #if str( $options_type.options_type_selector ) == "advanced":
+ ##additional outputs
+ #if $options_type.output_trace_option:
+ --trace "${output_trace}"
+ #end if
+ #if $options_type.output_failed_alleles_option:
+ --failed-alleles "${output_failed_alleles_bed}"
+ #end if
+
+ ##additional inputs
+ #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
+ --targets "${options_type.target_limit_type.input_target_bed}"
+ #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
+ --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
+ #end if
+ #if $options_type.input_sample_file:
+ --samples "${options_type.input_sample_file}"
+ #end if
+ #if $options_type.input_populations_file:
+ --populations "${options_type.input_populations_file}"
+ #end if
+ #if $options_type.input_cnv_map_bed:
+ --cnv-map "${options_type.input_cnv_map_bed}"
+ #end if
+ #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
+ --variant-input "${options_type.input_variant_type.input_variant_vcf}"
+ ${options_type.input_variant_type.only_use_input_alleles}
+ #end if
+ #if $options_type.haplotype_basis_alleles:
+ --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}"
+ #end if
+
+
+ ##reporting
+ #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
+ --pvar "${options_type.section_reporting_type.pvar}"
+ ${options_type.section_reporting_type.show_reference_repeats}
+ #end if
+
+ ##population model
+ #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
+ --theta "${options_type.section_population_model_type.theta}"
+ --ploidy "${options_type.section_population_model_type.ploidy}"
+ ${options_type.section_population_model_type.pooled}
+ #end if
+
+ ##reference allele
+ #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
+ --use-reference-allele
+ ${options_type.use_reference_allele_type.diploid_reference}
+ --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
+ #end if
+
+ ##allele scope
+ #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
+ ${options_type.section_allele_scope_type.no_snps}
+ ${options_type.section_allele_scope_type.no_indels}
+ ${options_type.section_allele_scope_type.no_mnps}
+ ${options_type.section_allele_scope_type.no_complex}
+ --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
+ #if $options_type.section_allele_scope_type.max_complex_gap:
+ --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
+ #end if
+ #end if
+
+ ##indel realignment
+ ${options_type.left_align_indels}
+
+ ##input filters
+ #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
+ ${options_type.section_input_filters_type.use_duplicate_reads}
+ #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters":
+ --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}"
+ --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}"
+ --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}"
+ #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters":
+ --standard-filters
+ #end if
+ --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
+ #if $options_type.section_input_filters_type.read_mismatch_limit:
+ --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
+ #end if
+ --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
+ #if $options_type.section_input_filters_type.read_snp_limit:
+ --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
+ #end if
+ #if $options_type.section_input_filters_type.read_indel_limit:
+ --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
+ #end if
+ --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
+ --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
+ --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
+ --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
+ --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
+ --min-coverage "${options_type.section_input_filters_type.min_coverage}"
+ #end if
+
+ ##bayesian priors
+ #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
+ ${options_type.section_bayesian_priors_type.no_ewens_priors}
+ ${options_type.section_bayesian_priors_type.no_population_priors}
+ ${options_type.section_bayesian_priors_type.hwe_priors}
+ #end if
+
+ ##observation prior expectations
+ #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
+ ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
+ ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
+ #end if
+
+ ##algorithmic features
+ #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
+ --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
+ --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
+ --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
+ --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
+ ${options_type.section_algorithmic_features_type.no_permute}
+ ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
+ #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
+ --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
+ #end if
+ ${options_type.section_algorithmic_features_type.use_mapping_quality}
+ --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
+ ${options_type.section_algorithmic_features_type.no_marginals}
+ #end if
+
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True
+
+
+ options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file.
+
+FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners.
+
+In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development.
+
+Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes.
+
+------
+
+**Inputs**
+
+FreeBayes accepts an input aligned BAM file.
+
+
+**Outputs**
+
+The output is in the VCF format.
+
+-------
+
+**Settings**::
+
+ input and output:
+
+ -b --bam FILE Add FILE to the set of BAM files to be analyzed.
+ -c --stdin Read BAM input on stdin.
+ -v --vcf FILE Output VCF-format results to FILE.
+ -f --fasta-reference FILE
+ Use FILE as the reference sequence for analysis.
+ An index file (FILE.fai) will be created if none exists.
+ If neither --targets nor --region are specified, FreeBayes
+ will analyze every position in this reference.
+ -t --targets FILE
+ Limit analysis to targets listed in the BED-format FILE.
+ -r --region <chrom>:<start_position>..<end_position>
+ Limit analysis to the specified region, 0-base coordinates,
+ end_position not included (same as BED format).
+ -s --samples FILE
+ Limit analysis to samples listed (one per line) in the FILE.
+ By default FreeBayes will analyze all samples in its input
+ BAM files.
+ --populations FILE
+ Each line of FILE should list a sample and a population which
+ it is part of. The population-based bayesian inference model
+ will then be partitioned on the basis of the populations.
+ -A --cnv-map FILE
+ Read a copy number map from the BED file FILE, which has
+ the format:
+ reference sequence, start, end, sample name, copy number
+ ... for each region in each sample which does not have the
+ default copy number as set by --ploidy.
+ -L --trace FILE Output an algorithmic trace to FILE.
+ --failed-alleles FILE
+ Write a BED file of the analyzed positions which do not
+ pass --pvar to FILE.
+ -@ --variant-input VCF
+ Use variants reported in VCF file as input to the algorithm.
+ A report will be generated for every record in the VCF file.
+ -l --only-use-input-alleles
+ Only provide variant calls and genotype likelihoods for sites
+ and alleles which are provided in the VCF input, and provide
+ output in the VCF for all input alleles, not just those which
+ have support in the data.
+ --haplotype-basis-alleles VCF
+ When specified, only variant alleles provided in this input
+ VCF will be used for the construction of complex or haplotype
+ alleles.
+
+ reporting:
+
+ -P --pvar N Report sites if the probability that there is a polymorphism
+ at the site is greater than N. default: 0.0001
+ -_ --show-reference-repeats
+ Calculate and show information about reference repeats in
+ the VCF output.
+
+ population model:
+
+ -T --theta N The expected mutation rate or pairwise nucleotide diversity
+ among the population under analysis. This serves as the
+ single parameter to the Ewens Sampling Formula prior model
+ default: 0.001
+ -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
+ -J --pooled Assume that samples result from pooled sequencing.
+ When using this flag, set --ploidy to the number of
+ alleles in each sample.
+
+ reference allele:
+
+ -Z --use-reference-allele
+ This flag includes the reference allele in the analysis as
+ if it is another sample from the same population.
+ -H --diploid-reference
+ If using the reference sequence as a sample (-Z),
+ treat it as diploid. default: false (reference is haploid)
+ --reference-quality MQ,BQ
+ Assign mapping quality of MQ to the reference allele at each
+ site and base quality of BQ. default: 100,60
+
+ allele scope:
+
+ -I --no-snps Ignore SNP alleles.
+ -i --no-indels Ignore insertion and deletion alleles.
+ -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
+ -u --no-complex Ignore complex events (composites of other classes).
+ -n --use-best-n-alleles N
+ Evaluate only the best N SNP alleles, ranked by sum of
+ supporting quality scores. (Set to 0 to use all; default: all)
+ -E --max-complex-gap N
+ Allow complex alleles with contiguous embedded matches of up
+ to this length.
+
+ indel realignment:
+
+ -O --left-align-indels
+ Left-realign and merge gaps embedded in reads. default: false
+
+ input filters:
+
+ -4 --use-duplicate-reads
+ Include duplicate-marked alignments in the analysis.
+ default: exclude duplicates
+ -m --min-mapping-quality Q
+ Exclude alignments from analysis if they have a mapping
+ quality less than Q. default: 30
+ -q --min-base-quality Q
+ Exclude alleles from analysis if their supporting base
+ quality is less than Q. default: 20
+ -R --min-supporting-quality MQ,BQ
+ In order to consider an alternate allele, at least one supporting
+ alignment must have mapping quality MQ, and one supporting
+ allele must have base quality BQ. default: 0,0, unset
+ -Q --mismatch-base-quality-threshold Q
+ Count mismatches toward --read-mismatch-limit if the base
+ quality of the mismatch is >= Q. default: 10
+ -U --read-mismatch-limit N
+ Exclude reads with more than N mismatches where each mismatch
+ has base quality >= mismatch-base-quality-threshold.
+ default: ~unbounded
+ -z --read-max-mismatch-fraction N
+ Exclude reads with more than N [0,1] fraction of mismatches where
+ each mismatch has base quality >= mismatch-base-quality-threshold
+ default: 1.0
+ -$ --read-snp-limit N
+ Exclude reads with more than N base mismatches, ignoring gaps
+ with quality >= mismatch-base-quality-threshold.
+ default: ~unbounded
+ -e --read-indel-limit N
+ Exclude reads with more than N separate gaps.
+ default: ~unbounded
+ -0 --standard-filters Use stringent input base and mapping quality filters
+ Equivalent to -m 30 -q 20 -R 0 -S 0
+ -x --indel-exclusion-window
+ Ignore portions of alignments this many bases from a
+ putative insertion or deletion allele. default: 0
+ -F --min-alternate-fraction N
+ Require at least this fraction of observations supporting
+ an alternate allele within a single individual in the
+ in order to evaluate the position. default: 0.0
+ -C --min-alternate-count N
+ Require at least this count of observations supporting
+ an alternate allele within a single individual in order
+ to evaluate the position. default: 1
+ -3 --min-alternate-qsum N
+ Require at least this sum of quality of observations supporting
+ an alternate allele within a single individual in order
+ to evaluate the position. default: 0
+ -G --min-alternate-total N
+ Require at least this count of observations supporting
+ an alternate allele within the total population in order
+ to use the allele in analysis. default: 1
+ -! --min-coverage N
+ Require at least this coverage to process a site. default: 0
+
+ bayesian priors:
+
+ -Y --no-ewens-priors
+ Turns off the Ewens' Sampling Formula component of the priors.
+ -k --no-population-priors
+ Equivalent to --pooled --no-ewens-priors
+ -w --hwe-priors Use the probability of the combination arising under HWE given
+ the allele frequency as estimated by observation frequency.
+
+ observation prior expectations:
+
+ -V --binomial-obs-priors
+ Incorporate expectations about osbervations into the priors,
+ Uses read placement probability, strand balance probability,
+ and read position (5'-3') probability.
+ -a --allele-balance-priors
+ Use aggregate probability of observation balance between alleles
+ as a component of the priors. Best for observations with minimal
+ inherent reference bias.
+
+ algorithmic features:
+
+ -M --site-selection-max-iterations N
+ Uses hill-climbing algorithm to search posterior space for N
+ iterations to determine if the site should be evaluated. Set to 0
+ to prevent use of this algorithm for site selection, and
+ to a low integer for improvide site selection at a slight
+ performance penalty. default: 5.
+ -B --genotyping-max-iterations N
+ Iterate no more than N times during genotyping step. default: 25.
+ --genotyping-max-banddepth N
+ Integrate no deeper than the Nth best genotype by likelihood when
+ genotyping. default: 6.
+ -W --posterior-integration-limits N,M
+ Integrate all genotype combinations in our posterior space
+ which include no more than N samples with their Mth best
+ data likelihood. default: 1,3.
+ -K --no-permute
+ Do not scale prior probability of genotype combination given allele
+ frequency by the number of permutations of included genotypes.
+ -N --exclude-unobserved-genotypes
+ Skip sample genotypings for which the sample has no supporting reads.
+ -S --genotype-variant-threshold N
+ Limit posterior integration to samples where the second-best
+ genotype likelihood is no more than log(N) from the highest
+ genotype likelihood for the sample. default: ~unbounded
+ -j --use-mapping-quality
+ Use mapping quality of alleles when calculating data likelihoods.
+ -D --read-dependence-factor N
+ Incorporate non-independence of reads by scaling successive
+ observations by this factor during data likelihood
+ calculations. default: 0.9
+ -= --no-marginals
+ Do not calculate the marginal probability of genotypes. Saves
+ time and improves scaling performance in large populations.
+
+
+------
+
+**Citation**
+
+For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_.
+
+If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
+
+
+
diff -r 000000000000 -r e21073b0dc1f sam_fa_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_fa_indices.loc.sample Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,28 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files. You will need
+#to create these data files and then create a sam_fa_indices.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The sam_fa_indices.loc
+#file has this format (white space characters are TAB characters):
+#
+#index
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/sam/,
+#then the sam_fa_indices.loc entry would look like this:
+#
+#index hg18 /depot/data2/galaxy/sam/hg18.fa
+#
+#and your /depot/data2/galaxy/sam/ directory
+#would contain hg18.fa and hg18.fa.fai files:
+#
+#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa
+#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai
+#
+#Your sam_fa_indices.loc file should include an entry per line for
+#each index set you have stored. The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file. For example:
+#
+#index hg18 /depot/data2/galaxy/sam/hg18.fa
+#index hg19 /depot/data2/galaxy/sam/hg19.fa
diff -r 000000000000 -r e21073b0dc1f test-data/fake_phiX_reads_1.bam
Binary file test-data/fake_phiX_reads_1.bam has changed
diff -r 000000000000 -r e21073b0dc1f test-data/freebayes_out_1.output_trace
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/freebayes_out_1.output_trace Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,8 @@
+phiX174,1411,allele,phiX174,phiX174,A,60,100
+phiX174,1412,allele,phiX174,phiX174,G,60,100
+phiX174,1413,allele,phiX174,phiX174,C,60,100
+phiX174,1414,allele,phiX174,phiX174,G,60,100
+phiX174,1415,allele,phiX174,phiX174,C,60,100
+phiX174,1416,allele,phiX174,phiX174,C,60,100
+phiX174,1417,allele,phiX174,phiX174,G,60,100
+phiX174,1418,allele,phiX174,phiX174,T,60,100
diff -r 000000000000 -r e21073b0dc1f test-data/freebayes_out_1.vcf.contains
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/freebayes_out_1.vcf.contains Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,2 @@
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+
diff -r 000000000000 -r e21073b0dc1f test-data/phiX.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX.fasta Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,79 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
diff -r 000000000000 -r e21073b0dc1f tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,8 @@
+
+
+
+
+ line_type, value, path
+
+
+
diff -r 000000000000 -r e21073b0dc1f tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Dec 11 15:41:19 2013 -0500
@@ -0,0 +1,47 @@
+
+
+
+
+
+ git clone --recursive git://github.com/ekg/freebayes.git
+ git checkout 9608597d12e127c847ae03aa03440ab63992fedf
+ git submodule update --recursive
+ make || ( make clean && sed -i.bak -e 's:LIBS = -lz -lm -L./ -L../vcflib/tabixpp/ -L$(BAMTOOLS_ROOT)/lib -ltabix:LIBS = -lm -L./ -L../vcflib/tabixpp/ -L$(BAMTOOLS_ROOT)/lib -ltabix -lz:g' src/Makefile && make )
+
+ bin
+ $INSTALL_DIR/bin
+
+
+ $INSTALL_DIR/bin
+
+
+
+
+FreeBayes requires g++ and the standard C and C++ development libraries.
+Additionally, cmake is required for building the BamTools API.
+
+
+
+
+
+ http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2
+ sed -i.bak -e 's/-lcurses/-lncurses/g' Makefile
+ make
+
+ samtools
+ $INSTALL_DIR/bin
+
+
+ misc/maq2sam-long
+ $INSTALL_DIR/bin
+
+
+ $INSTALL_DIR/bin
+
+
+
+
+Compiling SAMtools requires the ncurses and zlib development libraries.
+
+
+