Mercurial > repos > dfornika > snippy
changeset 21:0e733df972b5 draft
planemo upload commit d65fe6718a4e9a9fa6dba28e6702335222c3e221-dirty
author | dfornika |
---|---|
date | Tue, 12 Mar 2019 17:37:29 -0400 |
parents | 3bbfe41787af |
children | 0bf16c8aca73 |
files | snippy.xml test-data/a/ref.fa test-data/a/ref.fa.fai test-data/a/reference/genomes/ref.fa test-data/a/reference/ref.fa test-data/a/reference/ref.fa.amb test-data/a/reference/ref.fa.ann test-data/a/reference/ref.fa.bwt test-data/a/reference/ref.fa.fai test-data/a/reference/ref.fa.pac test-data/a/reference/ref.fa.sa test-data/a/reference/ref.gff test-data/a/reference/ref.txt test-data/a/reference/ref/genes.gff.gz test-data/a/snps.aligned.fa test-data/a/snps.bam test-data/a/snps.bam.bai test-data/a/snps.bed test-data/a/snps.consensus.fa test-data/a/snps.consensus.subs.fa test-data/a/snps.csv test-data/a/snps.filt.vcf test-data/a/snps.gff test-data/a/snps.html test-data/a/snps.log test-data/a/snps.raw.vcf test-data/a/snps.subs.vcf test-data/a/snps.tab test-data/a/snps.txt test-data/a/snps.vcf test-data/a/snps.vcf.gz test-data/a/snps.vcf.gz.csi test-data/b/ref.fa test-data/b/ref.fa.fai test-data/b/reference/genomes/ref.fa test-data/b/reference/ref.fa test-data/b/reference/ref.fa.amb test-data/b/reference/ref.fa.ann test-data/b/reference/ref.fa.bwt test-data/b/reference/ref.fa.fai test-data/b/reference/ref.fa.pac test-data/b/reference/ref.fa.sa test-data/b/reference/ref.gff test-data/b/reference/ref.txt test-data/b/reference/ref/genes.gff.gz test-data/b/snps.aligned.fa test-data/b/snps.bam test-data/b/snps.bam.bai test-data/b/snps.bed test-data/b/snps.consensus.fa test-data/b/snps.consensus.subs.fa test-data/b/snps.csv test-data/b/snps.filt.vcf test-data/b/snps.gff test-data/b/snps.html test-data/b/snps.log test-data/b/snps.raw.vcf test-data/b/snps.subs.vcf test-data/b/snps.tab test-data/b/snps.txt test-data/b/snps.vcf test-data/b/snps.vcf.gz test-data/b/snps.vcf.gz.csi test-data/b_gbk/ref.fa test-data/b_gbk/ref.fa.fai test-data/b_gbk/reference/genomes/ref.fa test-data/b_gbk/reference/ref.fa test-data/b_gbk/reference/ref.fa.amb test-data/b_gbk/reference/ref.fa.ann test-data/b_gbk/reference/ref.fa.bwt test-data/b_gbk/reference/ref.fa.fai test-data/b_gbk/reference/ref.fa.pac test-data/b_gbk/reference/ref.fa.sa test-data/b_gbk/reference/ref.gff test-data/b_gbk/reference/ref.txt test-data/b_gbk/reference/ref/genes.gff.gz test-data/b_gbk/reference/ref/snpEffectPredictor.bin test-data/b_gbk/reference/snpeff.config test-data/b_gbk/snps.bam test-data/b_gbk/snps.bam.bai test-data/b_gbk/snps.filt.vcf test-data/b_gbk/snps.log test-data/b_gbk/snps.raw.vcf test-data/b_gbk/snps.vcf test-data/b_out_dev/ref.fa test-data/b_out_dev/ref.fa.fai test-data/b_out_dev/reference/genomes/ref.fa test-data/b_out_dev/reference/ref.fa test-data/b_out_dev/reference/ref.fa.amb test-data/b_out_dev/reference/ref.fa.ann test-data/b_out_dev/reference/ref.fa.bwt test-data/b_out_dev/reference/ref.fa.fai test-data/b_out_dev/reference/ref.fa.pac test-data/b_out_dev/reference/ref.fa.sa test-data/b_out_dev/reference/ref.gff test-data/b_out_dev/reference/ref.txt test-data/b_out_dev/reference/ref/genes.gff.gz test-data/b_out_dev/snps.aligned.fa test-data/b_out_dev/snps.bam test-data/b_out_dev/snps.bam.bai test-data/b_out_dev/snps.bed test-data/b_out_dev/snps.consensus.fa test-data/b_out_dev/snps.consensus.subs.fa test-data/b_out_dev/snps.csv test-data/b_out_dev/snps.filt.vcf test-data/b_out_dev/snps.gff test-data/b_out_dev/snps.html test-data/b_out_dev/snps.log test-data/b_out_dev/snps.raw.vcf test-data/b_out_dev/snps.subs.vcf test-data/b_out_dev/snps.tab test-data/b_out_dev/snps.txt test-data/b_out_dev/snps.vcf test-data/b_out_dev/snps.vcf.gz test-data/b_out_dev/snps.vcf.gz.csi test-data/c/ref.fa test-data/c/ref.fa.fai test-data/c/reference/genomes/ref.fa test-data/c/reference/ref.fa test-data/c/reference/ref.fa.amb test-data/c/reference/ref.fa.ann test-data/c/reference/ref.fa.bwt test-data/c/reference/ref.fa.fai test-data/c/reference/ref.fa.pac test-data/c/reference/ref.fa.sa test-data/c/reference/ref.gff test-data/c/reference/ref.txt test-data/c/reference/ref/genes.gff.gz test-data/c/snps.aligned.fa test-data/c/snps.bam test-data/c/snps.bam.bai test-data/c/snps.bed test-data/c/snps.consensus.fa test-data/c/snps.consensus.subs.fa test-data/c/snps.csv test-data/c/snps.filt.vcf test-data/c/snps.gff test-data/c/snps.html test-data/c/snps.log test-data/c/snps.raw.vcf test-data/c/snps.subs.vcf test-data/c/snps.tab test-data/c/snps.txt test-data/c/snps.vcf test-data/c/snps.vcf.gz test-data/c/snps.vcf.gz.csi test-data/c_gbk/ref.fa test-data/c_gbk/ref.fa.fai test-data/c_gbk/reference/genomes/ref.fa test-data/c_gbk/reference/ref.fa test-data/c_gbk/reference/ref.fa.amb test-data/c_gbk/reference/ref.fa.ann test-data/c_gbk/reference/ref.fa.bwt test-data/c_gbk/reference/ref.fa.fai test-data/c_gbk/reference/ref.fa.pac test-data/c_gbk/reference/ref.fa.sa test-data/c_gbk/reference/ref.gff test-data/c_gbk/reference/ref.txt test-data/c_gbk/reference/ref/genes.gff.gz test-data/c_gbk/reference/ref/snpEffectPredictor.bin test-data/c_gbk/reference/snpeff.config test-data/c_gbk/snps.bam test-data/c_gbk/snps.bam.bai test-data/c_gbk/snps.filt.vcf test-data/c_gbk/snps.log test-data/c_gbk/snps.raw.vcf test-data/c_gbk/snps.vcf test-data/fna_ref_b_testing/ref.fa test-data/fna_ref_b_testing/ref.fa.fai test-data/fna_ref_b_testing/reference/genomes/ref.fa test-data/fna_ref_b_testing/reference/ref.fa test-data/fna_ref_b_testing/reference/ref.fa.amb test-data/fna_ref_b_testing/reference/ref.fa.ann test-data/fna_ref_b_testing/reference/ref.fa.bwt test-data/fna_ref_b_testing/reference/ref.fa.fai test-data/fna_ref_b_testing/reference/ref.fa.pac test-data/fna_ref_b_testing/reference/ref.fa.sa test-data/fna_ref_b_testing/reference/ref.gff test-data/fna_ref_b_testing/reference/ref.txt test-data/fna_ref_b_testing/reference/ref/genes.gff.gz test-data/fna_ref_b_testing/snps.aligned.fa test-data/fna_ref_b_testing/snps.bam test-data/fna_ref_b_testing/snps.bam.bai test-data/fna_ref_b_testing/snps.bed test-data/fna_ref_b_testing/snps.consensus.fa test-data/fna_ref_b_testing/snps.consensus.subs.fa test-data/fna_ref_b_testing/snps.csv test-data/fna_ref_b_testing/snps.filt.vcf test-data/fna_ref_b_testing/snps.gff test-data/fna_ref_b_testing/snps.html test-data/fna_ref_b_testing/snps.log test-data/fna_ref_b_testing/snps.raw.vcf test-data/fna_ref_b_testing/snps.subs.vcf test-data/fna_ref_b_testing/snps.tab test-data/fna_ref_b_testing/snps.txt test-data/fna_ref_b_testing/snps.vcf test-data/fna_ref_b_testing/snps.vcf.gz test-data/fna_ref_b_testing/snps.vcf.gz.csi test-data/gbk_ref_b/snps.vcf test-data/prokka-out/PROKKA_02062019.err test-data/prokka-out/PROKKA_02062019.faa test-data/prokka-out/PROKKA_02062019.ffn test-data/prokka-out/PROKKA_02062019.fna test-data/prokka-out/PROKKA_02062019.fsa test-data/prokka-out/PROKKA_02062019.gbk test-data/prokka-out/PROKKA_02062019.gff test-data/prokka-out/PROKKA_02062019.log test-data/prokka-out/PROKKA_02062019.sqn test-data/prokka-out/PROKKA_02062019.tbl test-data/prokka-out/PROKKA_02062019.tsv test-data/prokka-out/PROKKA_02062019.txt test-data/ref.fna test-data/reference.fasta.fai test-data/reference.gbk test-data/snippy-core-out/core.aln test-data/snippy-core-out/core.full.aln test-data/snippy-core-out/core.ref.fa test-data/snippy-core-out/core.tab test-data/snippy-core-out/core.txt test-data/snippy-core-out/core.vcf |
diffstat | 211 files changed, 547 insertions(+), 3833 deletions(-) [+] |
line wrap: on
line diff
--- a/snippy.xml Fri Mar 08 20:46:56 2019 -0500 +++ b/snippy.xml Tue Mar 12 17:37:29 2019 -0400 @@ -30,7 +30,7 @@ --minfrac $adv.minfrac --minqual $adv.minqual #if $adv.rgid - --rgid '$advanced.rgid' + --rgid '$adv.rgid' #end if #if $adv.bwaopt --bwaopt '$advanced.bwaopt' @@ -56,12 +56,17 @@ #import re #if str( $fastq_input.fastq_input_selector ) == "paired" #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input1.element_identifier) + #elif str( $fastq_input.fastq_input_selector ) == "paired_collection" + #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input.name) #else #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input.element_identifier) #end if mkdir -p ${dir_name} && cp -r out/reference out/snps.tab out/snps.aligned.fa out/snps.vcf ${dir_name}/ && tar -czf out.tgz ${dir_name} + #if "outcon" in str($outputs) and $adv.rename_cons + && sed -i 's/>.*/>${dir_name}/' out/snps.consensus.fa + #end if ]]></command> @@ -99,6 +104,7 @@ <param name="minqual" type="float" value="100.0" label="Minumum QUALITY in VCF column 6" help="Minumum QUALITY in VCF column 6" /> <param name="rgid" type="text" value="" label="Bam header @RG ID" help="Use this @RG ID: in the BAM header" /> <param name="bwaopt" type="text" value="" label="Extra BWA MEM options" help="Extra BWA MEM options, eg. -x pacbio" /> + <param name="rename_cons" type="boolean" truevalue="rename_cons" falsevalue="" help="When producing an output of the reference genome with variants instantiated, edit the header so that it is named after the input VCF" /> </section> <param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection">
--- a/test-data/a/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/a/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/a/reference/genomes/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/a/reference/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/a/reference/ref.fa.amb Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -700 1 0
--- a/test-data/a/reference/ref.fa.ann Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -700 1 11 -0 reference (null) -0 700 0
--- a/test-data/a/reference/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/a/reference/ref.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference:0-700
--- a/test-data/a/snps.aligned.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------
--- a/test-data/a/snps.consensus.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/a/snps.consensus.subs.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/a/snps.csv Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -CHROM,POS,TYPE,REF,ALT,EVIDENCE,FTYPE,STRAND,NT_POS,AA_POS,EFFECT,LOCUS_TAG,GENE,PRODUCT
--- a/test-data/a/snps.filt.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 11:34:57 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 11:34:57 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a
--- a/test-data/a/snps.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -##gff-version 3
--- a/test-data/a/snps.html Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -<TABLE ID='snps' BORDER=1> -<TR> -<TH>CHROM -<TH>POS -<TH>TYPE -<TH>REF -<TH>ALT -<TH>EVIDENCE -<TH>FTYPE -<TH>STRAND -<TH>NT_POS -<TH>AA_POS -<TH>EFFECT -<TH>LOCUS_TAG -<TH>GENE -<TH>PRODUCT -</TABLE>
--- a/test-data/a/snps.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,348 +0,0 @@ -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --outdir a --ref reference.fasta --R1 a_1.fastq --R2 a_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.068 sec; CPU: 0.004 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:a\tSM:a' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.13194. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.13194. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.13194. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 10 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 10 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=10 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --force --outdir a --ref reference.fasta --R1 a_1.fastq --R2 a_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:a\tSM:a' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.14292. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.14292. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.14292. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 20 --force --outdir a --ref reference.fasta --R1 a_1.fastq --R2 a_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:a\tSM:a' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.15235. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.15235. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.15235. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=20 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir a --ref reference.fasta --R1 a_1.fastq --R2 a_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:a\tSM:a' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.16998. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.16998. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.16998. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi -
--- a/test-data/a/snps.raw.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> -##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> -##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> -##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> -##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> -##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> -##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> -##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> -##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> -##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> -##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> -##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> -##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> -##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> -##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> -##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> -##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> -##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> -##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> -##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> -##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a
--- a/test-data/a/snps.subs.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -##fileformat=VCFv4.2 -##snippy="snippy-vcf_extract_subs snps.filt.vcf" -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 11:34:57 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 11:34:57 2019 -##INFO=<ID=OLDVAR,Number=R,Type=String,Description="Original REF,ALT before decomposition"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a
--- a/test-data/a/snps.tab Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -CHROM POS TYPE REF ALT EVIDENCE FTYPE STRAND NT_POS AA_POS EFFECT LOCUS_TAG GENE PRODUCT
--- a/test-data/a/snps.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -DateTime 2019-02-06T11:34:56 -ReadFiles /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/a_2.fastq -Reference /home/dfornika/Code/tools-iuc/tools/snippy/test-data/reference.fasta -ReferenceSize 700 -Software snippy 4.3.6 -VariantTotal 0
--- a/test-data/a/snps.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 11:34:57 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 11:34:57 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a
--- a/test-data/b/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/b/reference/genomes/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b/reference/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b/reference/ref.fa.amb Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -700 1 0
--- a/test-data/b/reference/ref.fa.ann Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -700 1 11 -0 reference (null) -0 700 0
--- a/test-data/b/reference/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/b/reference/ref.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference:0-700
--- a/test-data/b/snps.aligned.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------
--- a/test-data/b/snps.bed Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 3 4
--- a/test-data/b/snps.consensus.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b/snps.consensus.subs.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b/snps.csv Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -CHROM,POS,TYPE,REF,ALT,EVIDENCE,FTYPE,STRAND,NT_POS,AA_POS,EFFECT,LOCUS_TAG,GENE,PRODUCT -reference,4,snp,A,T,T:2 A:0
--- a/test-data/b/snps.filt.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:11:01 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:11:01 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b -reference 4 . A T 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/b/snps.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -##gff-version 3 -reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:2 A:0
--- a/test-data/b/snps.html Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<TABLE ID='snps' BORDER=1> -<TR> -<TH>CHROM -<TH>POS -<TH>TYPE -<TH>REF -<TH>ALT -<TH>EVIDENCE -<TH>FTYPE -<TH>STRAND -<TH>NT_POS -<TH>AA_POS -<TH>EFFECT -<TH>LOCUS_TAG -<TH>GENE -<TH>PRODUCT -<TR> -<TD>reference -<TD>4 -<TD>snp -<TD>A -<TD>T -<TD>T:2 A:0 -</TABLE>
--- a/test-data/b/snps.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,435 +0,0 @@ -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --outdir b --ref reference.fasta --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.004 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.13430. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.13430. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.13430. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 10 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 10 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=10 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --force --outdir b --ref reference.fasta --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.14512. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.14512. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.14512. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 20 --force --outdir b --ref reference.fasta --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.15458. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.15458. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.15458. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=20 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir b --ref reference.fasta --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.004 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.17221. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.17221. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.17221. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir b --ref reference.fasta --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.23600. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.23600. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.23600. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi -
--- a/test-data/b/snps.raw.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> -##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> -##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> -##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> -##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> -##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> -##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> -##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> -##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> -##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> -##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> -##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> -##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> -##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> -##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> -##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> -##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> -##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> -##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> -##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> -##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b -reference 4 . A T 63.8794 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=7.37776;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=0;RO=0;RPL=0;RPP=7.35324;RPPR=0;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:2:0,2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/b/snps.subs.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -##fileformat=VCFv4.2 -##snippy="snippy-vcf_extract_subs snps.filt.vcf" -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:11:01 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:11:01 2019 -##INFO=<ID=OLDVAR,Number=R,Type=String,Description="Original REF,ALT before decomposition"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b -reference 4 . A T 63.8794 . TYPE=snp;DP=2;RO=0;AO=2 GT:DP:RO:AO:QR:QA 1/1:2:0:2:0:80
--- a/test-data/b/snps.tab Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -CHROM POS TYPE REF ALT EVIDENCE FTYPE STRAND NT_POS AA_POS EFFECT LOCUS_TAG GENE PRODUCT -reference 4 snp A T T:2 A:0
--- a/test-data/b/snps.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -DateTime 2019-02-06T12:11:00 -ReadFiles /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq -Reference /home/dfornika/Code/tools-iuc/tools/snippy/test-data/reference.fasta -ReferenceSize 700 -Software snippy 4.3.6 -Variant-SNP 1 -VariantTotal 1
--- a/test-data/b/snps.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:11:01 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:11:01 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b -reference 4 . A T 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/b_gbk/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference Listeria monocytogenes strain strain. -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b_gbk/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 49 60 61
--- a/test-data/b_gbk/reference/genomes/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference Listeria monocytogenes strain strain. -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b_gbk/reference/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference Listeria monocytogenes strain strain. -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/b_gbk/reference/ref.fa.amb Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -700 1 0
--- a/test-data/b_gbk/reference/ref.fa.ann Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -700 1 11 -0 reference Listeria monocytogenes strain strain. -0 700 0
--- a/test-data/b_gbk/reference/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 49 60 61
--- a/test-data/b_gbk/reference/ref.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -##gff-version 3 -reference snippy CDS 188 658 . + 0 ID=KPFNHDNB_00001;codon_start=1;inference=ab initio prediction:Prodigal:2.6;locus_tag=KPFNHDNB_00001;product=hypothetical protein;transl_table=11;translation=MIFQRLLKTRDTEFYRVIQNRNIDDVFGYLLIHDKREPAEIDDFKVFAKSNINKEAFSVNIKKNHIYTMFFHFTDLEEEQEIPKFTKVIRFIEGLLSFQPETSHYVDNYLIKEKLIFEYPAEFEKIGEFAKYLVKLSGRKITIPDTTREKYIYLTQ
--- a/test-data/b_gbk/reference/ref.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference:0-700
--- a/test-data/b_gbk/reference/snpeff.config Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -codon.Standard : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Vertebrate_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/*, AGG/*, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Yeast_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/T, CTC/T, CTA/T, CTG/T, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Mold_Mitochondrial : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Protozoan_Mitochondrial : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Coelenterate : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Mitochondrial : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Mycoplasma : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Spiroplasma : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Invertebrate_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Ciliate_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Q, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Dasycladacean_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Q, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Hexamita_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Q, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Echinoderm_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Flatworm_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Euplotid_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/C, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Bacterial_and_Plant_Plastid : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Alternative_Yeast_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/S+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Ascidian_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/G, AGG/G, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Alternative_Flatworm_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Y, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Blepharisma_Macronuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Chlorophycean_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/L, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Trematode_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/M, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Scenedesmus_obliquus_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/*, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/L, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Thraustochytrium_Mitochondrial : TTT/F, TTC/F, TTA/*, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -ref.genome : Snippy Reference - ref.chromosome : reference - ref.reference.codonTable : Bacterial_and_Plant_Plastid
--- a/test-data/b_gbk/snps.filt.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:11:38 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:11:38 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b_gbk -reference 4 . A T 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/b_gbk/snps.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,168 +0,0 @@ -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir b_gbk --ref reference.gbk --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### snpEff build -c reference/snpeff.config -dataDir . -gff3 ref - - -### bwa mem -Y -M -R '@RG\tID:b_gbk\tSM:b_gbk' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.22845. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.22845. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.22845. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### snpEff ann -noLog -noStats -no-downstream -no-upstream -no-utr -t -c reference/snpeff.config -dataDir . ref snps.filt.vcf > snps.vcf - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir b_gbk --ref reference.gbk --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### snpEff build -c reference/snpeff.config -dataDir . -gff3 ref - - -### bwa mem -Y -M -R '@RG\tID:b_gbk\tSM:b_gbk' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.23270. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.23270. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.23270. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### snpEff ann -noLog -noStats -no-downstream -no-upstream -no-utr -t -c reference/snpeff.config -dataDir . ref snps.filt.vcf > snps.vcf - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir b_gbk --ref reference.gbk --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.004 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### snpEff build -c reference/snpeff.config -dataDir . -gff3 ref - - -### bwa mem -Y -M -R '@RG\tID:b_gbk\tSM:b_gbk' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.23837. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.23837. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.23837. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### snpEff ann -noLog -noStats -no-downstream -no-upstream -no-utr -t -c reference/snpeff.config -dataDir . ref snps.filt.vcf > snps.vcf -
--- a/test-data/b_gbk/snps.raw.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> -##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> -##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> -##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> -##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> -##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> -##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> -##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> -##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> -##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> -##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> -##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> -##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> -##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> -##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> -##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> -##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> -##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> -##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> -##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> -##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b_gbk -reference 4 . A T 63.8794 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=7.37776;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=0;RO=0;RPL=0;RPP=7.35324;RPPR=0;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:2:0,2:0:0:2:80:-7.59179,-0.60206,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/ref.fa Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,13 @@ +>reference +TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA +GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT +CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT +GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT +ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC +AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC +AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA +AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT +TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA +ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG +TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT +TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/ref.fa.fai Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,1 @@ +reference 700 11 60 61
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/reference/genomes/ref.fa Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,13 @@ +>reference +TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA +GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT +CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT +GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT +ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC +AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC +AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA +AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT +TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA +ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG +TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT +TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/reference/ref.fa Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,13 @@ +>reference +TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA +GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT +CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT +GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT +ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC +AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC +AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA +AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT +TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA +ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG +TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT +TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/reference/ref.fa.amb Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,1 @@ +700 1 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/reference/ref.fa.ann Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,3 @@ +700 1 11 +0 reference (null) +0 700 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/reference/ref.fa.fai Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,1 @@ +reference 700 11 60 61
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/reference/ref.txt Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,1 @@ +reference:0-700
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.aligned.fa Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,13 @@ +>reference +TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACT--------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.bed Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,1 @@ +reference 3 4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.consensus.fa Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,13 @@ +>reference +TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA +GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT +CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT +GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT +ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC +AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC +AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA +AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT +TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA +ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG +TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT +TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.consensus.subs.fa Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,13 @@ +>reference +TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA +GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT +CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT +GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT +ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC +AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC +AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA +AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT +TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA +ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG +TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT +TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.csv Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,2 @@ +CHROM,POS,TYPE,REF,ALT,EVIDENCE,FTYPE,STRAND,NT_POS,AA_POS,EFFECT,LOCUS_TAG,GENE,PRODUCT +reference,4,snp,A,T,T:10 A:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.filt.vcf Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,28 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20190312 +##source=freeBayes v1.2.0-dirty +##reference=reference/ref.fa +##contig=<ID=reference,length=700> +##phasing=none +##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##bcftools_viewVersion=1.9+htslib-1.9 +##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf; Date=Tue Mar 12 14:35:28 2019 +##bcftools_annotateVersion=1.9+htslib-1.9 +##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Tue Mar 12 14:35:28 2019 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b +reference 4 . A T 321.326 . AB=0;AO=10;DP=10;QA=400;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:10:0:0:10:400:-36.3607,-3.0103,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.gff Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,2 @@ +##gff-version 3 +reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:10 A:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.html Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,24 @@ +<TABLE ID='snps' BORDER=1> +<TR> +<TH>CHROM +<TH>POS +<TH>TYPE +<TH>REF +<TH>ALT +<TH>EVIDENCE +<TH>FTYPE +<TH>STRAND +<TH>NT_POS +<TH>AA_POS +<TH>EFFECT +<TH>LOCUS_TAG +<TH>GENE +<TH>PRODUCT +<TR> +<TD>reference +<TD>4 +<TD>snp +<TD>A +<TD>T +<TD>T:10 A:0 +</TABLE>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.log Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,261 @@ +### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --outdir b_out_dev --ref ref.fna --mapqual 60 --mincov 2 --minfrac 0.9 --minqual 60.0 --R1 b_1.fastq --R2 b_2.fastq + +### samtools faidx reference/ref.fa + + +### bwa index reference/ref.fa + +[bwa_index] Pack FASTA... 0.00 sec +[bwa_index] Construct BWT for the packed sequence... +[bwa_index] 0.00 seconds elapse. +[bwa_index] Update BWT... 0.00 sec +[bwa_index] Pack forward-only FASTA... 0.00 sec +[bwa_index] Construct SA from BWT and Occ... 0.00 sec +[main] Version: 0.7.17-r1188 +[main] CMD: bwa index reference/ref.fa +[main] Real time: 0.009 sec; CPU: 0.003 sec + +### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa + + +### ln -sf reference/ref.fa . + + +### ln -sf reference/ref.fa.fai . + + +### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz + + +### bwa mem -Y -M -R '@RG\tID:b_out_dev\tSM:b_out_dev' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.9396. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.9396. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.9396. -r -s - - > snps.bam + +READ 10 WRITTEN 10 +EXCLUDED 0 EXAMINED 10 +PAIRED 10 SINGLE 0 +DULPICATE PAIR 0 DUPLICATE SINGLE 0 +DUPLICATE TOTAL 0 + +### samtools index snps.bam + + +### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt + + +### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf + + +### bcftools view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf + + +### cp snps.filt.vcf snps.vcf + + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab + +Loading reference: reference/ref.fa +Loaded 1 sequences. +Loading features: reference/ref.gff +Parsing variants: snps.vcf +Converted 1 SNPs to TAB format. + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf + + +### bcftools convert -Oz -o snps.vcf.gz snps.vcf + + +### bcftools index -f snps.vcf.gz + + +### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz + +Note: the --sample option not given, applying all records regardless of the genotype + +### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf + + +### bcftools index -f snps.subs.vcf.gz + + +### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz + +Note: the --sample option not given, applying all records regardless of the genotype + +### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi + +### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --force --outdir b_out_dev --rgid b --ref ref.fna --mapqual 60 --mincov 2 --minfrac 0.9 --minqual 60.0 --R1 b_1.fastq --R2 b_2.fastq + +### samtools faidx reference/ref.fa + + +### bwa index reference/ref.fa + +[bwa_index] Pack FASTA... 0.00 sec +[bwa_index] Construct BWT for the packed sequence... +[bwa_index] 0.00 seconds elapse. +[bwa_index] Update BWT... 0.00 sec +[bwa_index] Pack forward-only FASTA... 0.00 sec +[bwa_index] Construct SA from BWT and Occ... 0.00 sec +[main] Version: 0.7.17-r1188 +[main] CMD: bwa index reference/ref.fa +[main] Real time: 0.008 sec; CPU: 0.002 sec + +### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa + + +### ln -sf reference/ref.fa . + + +### ln -sf reference/ref.fa.fai . + + +### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz + + +### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.10001. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.10001. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.10001. -r -s - - > snps.bam + +READ 10 WRITTEN 10 +EXCLUDED 0 EXAMINED 10 +PAIRED 10 SINGLE 0 +DULPICATE PAIR 0 DUPLICATE SINGLE 0 +DUPLICATE TOTAL 0 + +### samtools index snps.bam + + +### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt + + +### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf + + +### bcftools view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf + + +### cp snps.filt.vcf snps.vcf + + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab + +Loading reference: reference/ref.fa +Loaded 1 sequences. +Loading features: reference/ref.gff +Parsing variants: snps.vcf +Converted 1 SNPs to TAB format. + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf + + +### bcftools convert -Oz -o snps.vcf.gz snps.vcf + + +### bcftools index -f snps.vcf.gz + + +### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz + +Note: the --sample option not given, applying all records regardless of the genotype + +### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf + + +### bcftools index -f snps.subs.vcf.gz + + +### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz + +Note: the --sample option not given, applying all records regardless of the genotype + +### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi + +### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --force --outdir b_out_dev --rgid b --ref ref.fna --mapqual 60 --mincov 2 --minfrac 0.9 --minqual 60.0 --R1 b_1.fastq --R2 b_2.fastq + +### samtools faidx reference/ref.fa + + +### bwa index reference/ref.fa + +[bwa_index] Pack FASTA... 0.00 sec +[bwa_index] Construct BWT for the packed sequence... +[bwa_index] 0.00 seconds elapse. +[bwa_index] Update BWT... 0.00 sec +[bwa_index] Pack forward-only FASTA... 0.00 sec +[bwa_index] Construct SA from BWT and Occ... 0.00 sec +[main] Version: 0.7.17-r1188 +[main] CMD: bwa index reference/ref.fa +[main] Real time: 0.009 sec; CPU: 0.003 sec + +### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa + + +### ln -sf reference/ref.fa . + + +### ln -sf reference/ref.fa.fai . + + +### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz + + +### bwa mem -Y -M -R '@RG\tID:b\tSM:b' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.22109. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.22109. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.22109. -r -s - - > snps.bam + +READ 10 WRITTEN 10 +EXCLUDED 0 EXAMINED 10 +PAIRED 10 SINGLE 0 +DULPICATE PAIR 0 DUPLICATE SINGLE 0 +DUPLICATE TOTAL 0 + +### samtools index snps.bam + + +### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt + + +### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf + + +### bcftools view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf + + +### cp snps.filt.vcf snps.vcf + + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab + +Loading reference: reference/ref.fa +Loaded 1 sequences. +Loading features: reference/ref.gff +Parsing variants: snps.vcf +Converted 1 SNPs to TAB format. + +### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf + + +### bcftools convert -Oz -o snps.vcf.gz snps.vcf + + +### bcftools index -f snps.vcf.gz + + +### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz + +Note: the --sample option not given, applying all records regardless of the genotype + +### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf + + +### bcftools index -f snps.subs.vcf.gz + + +### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz + +Note: the --sample option not given, applying all records regardless of the genotype + +### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.raw.vcf Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,62 @@ +##fileformat=VCFv4.2 +##fileDate=20190312 +##source=freeBayes v1.2.0-dirty +##reference=reference/ref.fa +##contig=<ID=reference,length=700> +##phasing=none +##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b +reference 4 . A T 321.326 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=10;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=18.4681;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=400;QR=0;RO=0;RPL=0;RPP=24.725;RPPR=0;RPR=10;RUN=1;SAF=5;SAP=3.0103;SAR=5;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:10:0,10:0:0:10:400:-36.3607,-3.0103,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.subs.vcf Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,27 @@ +##fileformat=VCFv4.2 +##snippy="snippy-vcf_extract_subs snps.filt.vcf" +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20190312 +##source=freeBayes v1.2.0-dirty +##reference=reference/ref.fa +##contig=<ID=reference,length=700> +##phasing=none +##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##bcftools_viewVersion=1.9+htslib-1.9 +##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf; Date=Tue Mar 12 14:35:28 2019 +##bcftools_annotateVersion=1.9+htslib-1.9 +##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Tue Mar 12 14:35:28 2019 +##INFO=<ID=OLDVAR,Number=R,Type=String,Description="Original REF,ALT before decomposition"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b +reference 4 . A T 321.326 . TYPE=snp;DP=10;RO=0;AO=10 GT:DP:RO:AO:QR:QA 1/1:10:0:10:0:400
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.tab Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,2 @@ +CHROM POS TYPE REF ALT EVIDENCE FTYPE STRAND NT_POS AA_POS EFFECT LOCUS_TAG GENE PRODUCT +reference 4 snp A T T:10 A:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.txt Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,7 @@ +DateTime 2019-03-12T14:35:28 +ReadFiles /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq +Reference /home/dfornika/Code/tools-iuc/tools/snippy/test-data/ref.fna +ReferenceSize 700 +Software snippy 4.3.6 +Variant-SNP 1 +VariantTotal 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b_out_dev/snps.vcf Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,28 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20190312 +##source=freeBayes v1.2.0-dirty +##reference=reference/ref.fa +##contig=<ID=reference,length=700> +##phasing=none +##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##bcftools_viewVersion=1.9+htslib-1.9 +##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf; Date=Tue Mar 12 14:35:28 2019 +##bcftools_annotateVersion=1.9+htslib-1.9 +##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Tue Mar 12 14:35:28 2019 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT b +reference 4 . A T 321.326 . AB=0;AO=10;DP=10;QA=400;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:10:0:0:10:400:-36.3607,-3.0103,0
--- a/test-data/c/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/c/reference/genomes/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c/reference/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c/reference/ref.fa.amb Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -700 1 0
--- a/test-data/c/reference/ref.fa.ann Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -700 1 11 -0 reference (null) -0 700 0
--- a/test-data/c/reference/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/c/reference/ref.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference:0-700
--- a/test-data/c/snps.aligned.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTACACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------
--- a/test-data/c/snps.bed Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 47 48
--- a/test-data/c/snps.consensus.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTACACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c/snps.consensus.subs.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTACACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c/snps.csv Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -CHROM,POS,TYPE,REF,ALT,EVIDENCE,FTYPE,STRAND,NT_POS,AA_POS,EFFECT,LOCUS_TAG,GENE,PRODUCT -reference,48,snp,A,C,C:2 A:0
--- a/test-data/c/snps.filt.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:42:19 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:42:19 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT c -reference 48 . A C 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/c/snps.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -##gff-version 3 -reference snippy:4.3.6 variation 48 48 . . 0 note=snp A=>C C:2 A:0
--- a/test-data/c/snps.html Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<TABLE ID='snps' BORDER=1> -<TR> -<TH>CHROM -<TH>POS -<TH>TYPE -<TH>REF -<TH>ALT -<TH>EVIDENCE -<TH>FTYPE -<TH>STRAND -<TH>NT_POS -<TH>AA_POS -<TH>EFFECT -<TH>LOCUS_TAG -<TH>GENE -<TH>PRODUCT -<TR> -<TD>reference -<TD>48 -<TD>snp -<TD>A -<TD>C -<TD>C:2 A:0 -</TABLE>
--- a/test-data/c/snps.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,783 +0,0 @@ -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.13651. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.13651. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.13651. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 10 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 10 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=10 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.14000. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.14000. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.14000. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.14733. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.14733. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.14733. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 20 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.004 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.15004. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.15004. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.15004. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=20 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.17443. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.17443. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.17443. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.25837. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.25837. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.25837. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=100 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 80 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.26070. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.26070. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.26070. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=80 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 65 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.26290. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.26290. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.26290. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=65 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 0 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi - -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir c --ref reference.fasta --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.070 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:c\tSM:c' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.26511. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.26511. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.26511. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi -
--- a/test-data/c/snps.raw.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> -##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> -##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> -##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> -##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> -##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> -##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> -##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> -##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> -##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> -##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> -##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> -##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> -##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> -##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> -##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> -##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> -##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> -##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> -##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> -##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT c -reference 48 . A C 63.8794 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=7.37776;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=0;RO=0;RPL=2;RPP=7.35324;RPPR=0;RPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:2:0,2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/c/snps.subs.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -##fileformat=VCFv4.2 -##snippy="snippy-vcf_extract_subs snps.filt.vcf" -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:42:19 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:42:19 2019 -##INFO=<ID=OLDVAR,Number=R,Type=String,Description="Original REF,ALT before decomposition"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT c -reference 48 . A C 63.8794 . TYPE=snp;DP=2;RO=0;AO=2 GT:DP:RO:AO:QR:QA 1/1:2:0:2:0:80
--- a/test-data/c/snps.tab Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -CHROM POS TYPE REF ALT EVIDENCE FTYPE STRAND NT_POS AA_POS EFFECT LOCUS_TAG GENE PRODUCT -reference 48 snp A C C:2 A:0
--- a/test-data/c/snps.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -DateTime 2019-02-06T12:42:18 -ReadFiles /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq -Reference /home/dfornika/Code/tools-iuc/tools/snippy/test-data/reference.fasta -ReferenceSize 700 -Software snippy 4.3.6 -Variant-SNP 1 -VariantTotal 1
--- a/test-data/c/snps.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:42:19 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:42:19 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT c -reference 48 . A C 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/c_gbk/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c_gbk/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 49 60 61
--- a/test-data/c_gbk/reference/genomes/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference Listeria monocytogenes strain strain. -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c_gbk/reference/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/c_gbk/reference/ref.fa.amb Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -700 1 0
--- a/test-data/c_gbk/reference/ref.fa.ann Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -700 1 11 -0 reference Listeria monocytogenes strain strain. -0 700 0
--- a/test-data/c_gbk/reference/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 49 60 61
--- a/test-data/c_gbk/reference/ref.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -##gff-version 3 -reference snippy CDS 188 658 . + 0 ID=KPFNHDNB_00001;codon_start=1;inference=ab initio prediction:Prodigal:2.6;locus_tag=KPFNHDNB_00001;product=hypothetical protein;transl_table=11;translation=MIFQRLLKTRDTEFYRVIQNRNIDDVFGYLLIHDKREPAEIDDFKVFAKSNINKEAFSVNIKKNHIYTMFFHFTDLEEEQEIPKFTKVIRFIEGLLSFQPETSHYVDNYLIKEKLIFEYPAEFEKIGEFAKYLVKLSGRKITIPDTTREKYIYLTQ
--- a/test-data/c_gbk/reference/ref.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference:0-700
--- a/test-data/c_gbk/reference/snpeff.config Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -codon.Standard : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Vertebrate_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/*, AGG/*, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Yeast_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/T, CTC/T, CTA/T, CTG/T, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Mold_Mitochondrial : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Protozoan_Mitochondrial : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Coelenterate : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Mitochondrial : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Mycoplasma : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Spiroplasma : TTT/F, TTC/F, TTA/L+, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Invertebrate_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Ciliate_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Q, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Dasycladacean_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Q, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Hexamita_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Q, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Echinoderm_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Flatworm_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Euplotid_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/C, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Bacterial_and_Plant_Plastid : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I+, ATA/I+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Alternative_Yeast_Nuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/S+, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Ascidian_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L+, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/M+, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/G, AGG/G, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Alternative_Flatworm_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/Y, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Blepharisma_Macronuclear : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/Q, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Chlorophycean_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/L, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Trematode_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/W, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/M, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/N, AAG/K, AGT/S, AGC/S, AGA/S, AGG/S, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Scenedesmus_obliquus_Mitochondrial : TTT/F, TTC/F, TTA/L, TTG/L, TCT/S, TCC/S, TCA/*, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/L, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -codon.Thraustochytrium_Mitochondrial : TTT/F, TTC/F, TTA/*, TTG/L, TCT/S, TCC/S, TCA/S, TCG/S, TAT/Y, TAC/Y, TAA/*, TAG/*, TGT/C, TGC/C, TGA/*, TGG/W, CTT/L, CTC/L, CTA/L, CTG/L, CCT/P, CCC/P, CCA/P, CCG/P, CAT/H, CAC/H, CAA/Q, CAG/Q, CGT/R, CGC/R, CGA/R, CGG/R, ATT/I+, ATC/I, ATA/I, ATG/M+, ACT/T, ACC/T, ACA/T, ACG/T, AAT/N, AAC/N, AAA/K, AAG/K, AGT/S, AGC/S, AGA/R, AGG/R, GTT/V, GTC/V, GTA/V, GTG/V+, GCT/A, GCC/A, GCA/A, GCG/A, GAT/D, GAC/D, GAA/E, GAG/E, GGT/G, GGC/G, GGA/G, GGG/G -ref.genome : Snippy Reference - ref.chromosome : reference - ref.reference.codonTable : Bacterial_and_Plant_Plastid
--- a/test-data/c_gbk/snps.filt.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf; Date=Wed Feb 6 12:30:20 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Wed Feb 6 12:30:20 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT c_gbk -reference 48 . A C 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/c_gbk/snps.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --mincov 2 --minqual 60 --force --outdir c_gbk --ref reference.gbk --R1 c_1.fastq --R2 c_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.071 sec; CPU: 0.004 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### snpEff build -c reference/snpeff.config -dataDir . -gff3 ref - - -### bwa mem -Y -M -R '@RG\tID:c_gbk\tSM:c_gbk' -t 8 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/c_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.24822. --threads 8 -m 1000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.24822. --threads 8 -m 1000M | samtools markdup -T /tmp/snippy.24822. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 8 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### snpEff ann -noLog -noStats -no-downstream -no-upstream -no-utr -t -c reference/snpeff.config -dataDir . ref snps.filt.vcf > snps.vcf -
--- a/test-data/c_gbk/snps.raw.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20190206 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> -##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> -##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> -##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> -##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> -##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> -##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> -##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> -##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> -##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> -##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> -##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> -##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> -##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> -##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> -##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> -##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> -##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> -##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> -##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> -##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT c_gbk -reference 48 . A C 63.8794 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=7.37776;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=0;RO=0;RPL=2;RPP=7.35324;RPPR=0;RPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:2:0,2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/fna_ref_b_testing/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/fna_ref_b_testing/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/fna_ref_b_testing/reference/genomes/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/fna_ref_b_testing/reference/ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/fna_ref_b_testing/reference/ref.fa.amb Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -700 1 0
--- a/test-data/fna_ref_b_testing/reference/ref.fa.ann Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -700 1 11 -0 reference (null) -0 700 0
--- a/test-data/fna_ref_b_testing/reference/ref.fa.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 60 61
--- a/test-data/fna_ref_b_testing/reference/ref.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference:0-700
--- a/test-data/fna_ref_b_testing/snps.aligned.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------
--- a/test-data/fna_ref_b_testing/snps.bed Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 3 4
--- a/test-data/fna_ref_b_testing/snps.consensus.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/fna_ref_b_testing/snps.consensus.subs.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCTCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/fna_ref_b_testing/snps.csv Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -CHROM,POS,TYPE,REF,ALT,EVIDENCE,FTYPE,STRAND,NT_POS,AA_POS,EFFECT,LOCUS_TAG,GENE,PRODUCT -reference,4,snp,A,T,T:2 A:0
--- a/test-data/fna_ref_b_testing/snps.filt.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190211 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf; Date=Mon Feb 11 11:46:35 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Mon Feb 11 11:46:35 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT fna_ref_b_testing -reference 4 . A T 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/fna_ref_b_testing/snps.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -##gff-version 3 -reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:2 A:0
--- a/test-data/fna_ref_b_testing/snps.html Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<TABLE ID='snps' BORDER=1> -<TR> -<TH>CHROM -<TH>POS -<TH>TYPE -<TH>REF -<TH>ALT -<TH>EVIDENCE -<TH>FTYPE -<TH>STRAND -<TH>NT_POS -<TH>AA_POS -<TH>EFFECT -<TH>LOCUS_TAG -<TH>GENE -<TH>PRODUCT -<TR> -<TD>reference -<TD>4 -<TD>snp -<TD>A -<TD>T -<TD>T:2 A:0 -</TABLE>
--- a/test-data/fna_ref_b_testing/snps.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -### cd /home/dfornika/Code/tools-iuc/tools/snippy/test-data - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy --outdir fna_ref_b_testing --cpus 1 --ref reference.fasta --mapqual 60 --mincov 2 --minfrac 0.9 --minqual 60.0 --R1 b_1.fastq --R2 b_2.fastq - -### samtools faidx reference/ref.fa - - -### bwa index reference/ref.fa - -[bwa_index] Pack FASTA... 0.00 sec -[bwa_index] Construct BWT for the packed sequence... -[bwa_index] 0.00 seconds elapse. -[bwa_index] Update BWT... 0.00 sec -[bwa_index] Pack forward-only FASTA... 0.00 sec -[bwa_index] Construct SA from BWT and Occ... 0.00 sec -[main] Version: 0.7.17-r1188 -[main] CMD: bwa index reference/ref.fa -[main] Real time: 0.009 sec; CPU: 0.003 sec - -### mkdir -p reference/genomes && cp -f reference/ref.fa reference/genomes/ref.fa - - -### ln -sf reference/ref.fa . - - -### ln -sf reference/ref.fa.fai . - - -### mkdir -p reference/ref && gzip -c reference/ref.gff > reference/ref/genes.gff.gz - - -### bwa mem -Y -M -R '@RG\tID:fna_ref_b_testing\tSM:fna_ref_b_testing' -t 1 reference/ref.fa /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq | samclip --max 10 --ref reference/ref.fa.fai | samtools sort -n -l 0 -T /tmp/snippy.8702. --threads 1 -m 8000M | samtools fixmate -m - - | samtools sort -l 0 -T /tmp/snippy.8702. --threads 1 -m 8000M | samtools markdup -T /tmp/snippy.8702. -r -s - - > snps.bam - -READ 10 WRITTEN 2 -EXCLUDED 0 EXAMINED 10 -PAIRED 10 SINGLE 0 -DULPICATE PAIR 8 DUPLICATE SINGLE 0 -DUPLICATE TOTAL 8 - -### samtools index snps.bam - - -### fasta_generate_regions.py reference/ref.fa.fai 1000 > reference/ref.txt - - -### freebayes-parallel reference/ref.txt 1 -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam > snps.raw.vcf - - -### bcftools view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf | vt normalize -r reference/ref.fa - | bcftools annotate --remove '^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL' > snps.filt.vcf - - -### cp snps.filt.vcf snps.vcf - - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_to_tab --gff reference/ref.gff --ref reference/ref.fa --vcf snps.vcf > snps.tab - -Loading reference: reference/ref.fa -Loaded 1 sequences. -Loading features: reference/ref.gff -Parsing variants: snps.vcf -Converted 1 SNPs to TAB format. - -### /home/dfornika/miniconda3/envs/snippy-4.3.6/bin/snippy-vcf_extract_subs snps.filt.vcf > snps.subs.vcf - - -### bcftools convert -Oz -o snps.vcf.gz snps.vcf - - -### bcftools index -f snps.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.fa snps.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### bcftools convert -Oz -o snps.subs.vcf.gz snps.subs.vcf - - -### bcftools index -f snps.subs.vcf.gz - - -### bcftools consensus -f reference/ref.fa -o snps.consensus.subs.fa snps.subs.vcf.gz - -Note: the --sample option not given, applying all records regardless of the genotype - -### rm -f snps.subs.vcf.gz snps.subs.vcf.gz.csi snps.subs.vcf.gz.tbi -
--- a/test-data/fna_ref_b_testing/snps.raw.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20190211 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> -##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> -##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> -##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> -##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> -##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> -##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> -##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> -##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> -##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> -##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> -##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> -##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> -##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> -##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> -##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> -##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> -##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> -##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> -##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> -##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> -##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT fna_ref_b_testing -reference 4 . A T 63.8794 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=7.37776;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=0;RO=0;RPL=0;RPP=7.35324;RPPR=0;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:2:0,2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/fna_ref_b_testing/snps.subs.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -##fileformat=VCFv4.2 -##snippy="snippy-vcf_extract_subs snps.filt.vcf" -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190211 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf; Date=Mon Feb 11 11:46:35 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Mon Feb 11 11:46:35 2019 -##INFO=<ID=OLDVAR,Number=R,Type=String,Description="Original REF,ALT before decomposition"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT fna_ref_b_testing -reference 4 . A T 63.8794 . TYPE=snp;DP=2;RO=0;AO=2 GT:DP:RO:AO:QR:QA 1/1:2:0:2:0:80
--- a/test-data/fna_ref_b_testing/snps.tab Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -CHROM POS TYPE REF ALT EVIDENCE FTYPE STRAND NT_POS AA_POS EFFECT LOCUS_TAG GENE PRODUCT -reference 4 snp A T T:2 A:0
--- a/test-data/fna_ref_b_testing/snps.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -DateTime 2019-02-11T11:46:34 -ReadFiles /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_1.fastq /home/dfornika/Code/tools-iuc/tools/snippy/test-data/b_2.fastq -Reference /home/dfornika/Code/tools-iuc/tools/snippy/test-data/reference.fasta -ReferenceSize 700 -Software snippy 4.3.6 -Variant-SNP 1 -VariantTotal 1
--- a/test-data/fna_ref_b_testing/snps.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20190211 -##source=freeBayes v1.2.0-dirty -##reference=reference/ref.fa -##contig=<ID=reference,length=700> -##phasing=none -##commandline="freebayes -p 2 -P 0 -C 2 --min-repeat-entropy 1.5 --strict-vcf -q 13 -m 60 --min-coverage 2 -F 0.05 -f reference/ref.fa snps.bam --region reference:0-700" -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> -##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> -##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> -##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> -##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> -##bcftools_viewVersion=1.9+htslib-1.9 -##bcftools_viewCommand=view --include 'FMT/GT="1/1" && QUAL>=60.0 && FMT/DP>=2 && (FMT/AO)/(FMT/DP)>=0.9' snps.raw.vcf; Date=Mon Feb 11 11:46:35 2019 -##bcftools_annotateVersion=1.9+htslib-1.9 -##bcftools_annotateCommand=annotate --remove ^INFO/TYPE,^INFO/DP,^INFO/RO,^INFO/AO,^INFO/AB,^FORMAT/GT,^FORMAT/DP,^FORMAT/RO,^FORMAT/AO,^FORMAT/QR,^FORMAT/QA,^FORMAT/GL; Date=Mon Feb 11 11:46:35 2019 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT fna_ref_b_testing -reference 4 . A T 63.8794 . AB=0;AO=2;DP=2;QA=80;QR=0;RO=0;TYPE=snp GT:DP:RO:QR:AO:QA:GL 1/1:2:0:0:2:80:-7.59179,-0.60206,0
--- a/test-data/prokka-out/PROKKA_02062019.err Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -Discrepancy Report Results - -Summary -DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" -FATAL: MISSING_PROTEIN_ID:1 proteins have invalid IDs. -DISC_SOURCE_QUALS_ASNDISC:strain (all present, all unique) -DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all unique) -DISC_FEATURE_COUNT:CDS: 1 present -DISC_COUNT_NUCLEOTIDES:1 nucleotide Bioseqs are present -FEATURE_LOCATION_CONFLICT:1 features have inconsistent gene locations. -DISC_QUALITY_SCORES:Quality scores are missing on all sequences. -ONCALLER_COMMENT_PRESENT:1 comment descriptors were found (all same) -MISSING_GENOMEASSEMBLY_COMMENTS:1 bioseqs are missing GenomeAssembly structured comments -MOLTYPE_NOT_MRNA:1 molecule types are not set as mRNA. -TECHNIQUE_NOT_TSA:1 technique are not set as TSA -MISSING_STRUCTURED_COMMENT:1 sequences do not include structured comments. -MISSING_PROJECT:2 sequences do not include project. -DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) - - -Detailed Report - -DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" - -FATAL: DiscRep_ALL:MISSING_PROTEIN_ID::1 proteins have invalid IDs. -prokka-out/PROKKA_02062019:reference_1 (length 156) - -DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::strain (all present, all unique) -DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::1 sources have unique values for strain -DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all unique) -DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::1 sources have unique values for taxname -DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 1 present -DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::1 nucleotide Bioseqs are present -prokka-out/PROKKA_02062019:reference (length 700) - -DiscRep_ALL:FEATURE_LOCATION_CONFLICT::1 features have inconsistent gene locations. -DiscRep_SUB:FEATURE_LOCATION_CONFLICT::Coding region xref gene does not exist -prokka-out/PROKKA_02062019:CDS hypothetical protein reference:188-658 KPFNHDNB_00001 - -DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. - -DiscRep_ALL:ONCALLER_COMMENT_PRESENT::1 comment descriptors were found (all same) -prokka-out/PROKKA_02062019:reference:Annotated using prokka 1.13.3 from https://github.com/tseemann/prokka - -DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::1 bioseqs are missing GenomeAssembly structured comments -prokka-out/PROKKA_02062019:reference (length 700) - -DiscRep_ALL:MOLTYPE_NOT_MRNA::1 molecule types are not set as mRNA. -prokka-out/PROKKA_02062019:reference (length 700) - -DiscRep_ALL:TECHNIQUE_NOT_TSA::1 technique are not set as TSA -prokka-out/PROKKA_02062019:reference (length 700) - -DiscRep_ALL:MISSING_STRUCTURED_COMMENT::1 sequences do not include structured comments. -prokka-out/PROKKA_02062019:reference (length 700) - -DiscRep_ALL:MISSING_PROJECT::2 sequences do not include project. -prokka-out/PROKKA_02062019:reference (length 700) -prokka-out/PROKKA_02062019:reference_1 (length 156) - -DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) -DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) -DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::1 Molinfos are missing field technique -prokka-out/PROKKA_02062019:reference (length 700) -
--- a/test-data/prokka-out/PROKKA_02062019.faa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ ->KPFNHDNB_00001 hypothetical protein -MIFQRLLKTRDTEFYRVIQNRNIDDVFGYLLIHDKREPAEIDDFKVFAKSNINKEAFSVN -IKKNHIYTMFFHFTDLEEEQEIPKFTKVIRFIEGLLSFQPETSHYVDNYLIKEKLIFEYP -AEFEKIGEFAKYLVKLSGRKITIPDTTREKYIYLTQ
--- a/test-data/prokka-out/PROKKA_02062019.ffn Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ ->KPFNHDNB_00001 hypothetical protein -ATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTATACAAAAC -AGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACCAGCAGAA -ATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTCAGTGAAT -ATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGAAGAACAG -GAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTTTCAGCCA -GAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGAATATCCT -GCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGGTCGTAAA -ATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAA
--- a/test-data/prokka-out/PROKKA_02062019.fna Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/prokka-out/PROKKA_02062019.fsa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ ->reference [gcode=11] [organism=Listeria monocytogenes] [strain=strain] -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/prokka-out/PROKKA_02062019.gbk Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -LOCUS reference 700 bp DNA linear 06-FEB-2019 -DEFINITION Listeria monocytogenes strain strain. -ACCESSION -VERSION -KEYWORDS . -SOURCE Listeria monocytogenes - ORGANISM Listeria monocytogenes - Unclassified. -COMMENT Annotated using prokka 1.13.3 from - https://github.com/tseemann/prokka. -FEATURES Location/Qualifiers - source 1..700 - /organism="Listeria monocytogenes" - /mol_type="genomic DNA" - /strain="strain" - CDS 188..658 - /locus_tag="KPFNHDNB_00001" - /inference="ab initio prediction:Prodigal:2.6" - /codon_start=1 - /transl_table=11 - /product="hypothetical protein" - /translation="MIFQRLLKTRDTEFYRVIQNRNIDDVFGYLLIHDKREPAEIDDF - KVFAKSNINKEAFSVNIKKNHIYTMFFHFTDLEEEQEIPKFTKVIRFIEGLLSFQPET - SHYVDNYLIKEKLIFEYPAEFEKIGEFAKYLVKLSGRKITIPDTTREKYIYLTQ" -ORIGIN - 1 tccacaagcc attgtgtgta attaaccact aattgtgtat aagtttaaac taattgaaaa - 61 ggttatccac aataaaaagg cgttattcag gagttatcca cactttctag gaaaggattt - 121 cattgcgcca atgtgttaaa ctatttaccg aatacgaaaa aaagacaaat aaatgaggtt - 181 gtgaaaaatg atatttcaac ggcttttgaa aactagagat acagagtttt atcgagttat - 241 acaaaacagg aatattgacg acgtatttgg atacttatta attcacgata aacgggaacc - 301 agcagaaatt gacgatttta aggtatttgc aaaaagtaat ataaataaag aagctttttc - 361 agtgaatatc aaaaaaaatc atatttacac gatgtttttc cactttactg atttagagga - 421 agaacaggaa attccaaaat ttactaaagt tattcgtttt atagaaggac ttttatcttt - 481 tcagccagaa acaagccatt acgttgataa ctatttaata aaggaaaaac taatttttga - 541 atatcctgct gaatttgaga aaatcgggga gtttgctaaa tatttagtaa agctttcggg - 601 tcgtaaaatt actattccag acacaacgag agaaaaatat atctatttaa cgcaataatt - 661 ttcgaaaaat ggtttttctc tctataaaaa tatgatatga -//
--- a/test-data/prokka-out/PROKKA_02062019.gff Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -##gff-version 3 -##sequence-region reference 1 700 -reference Prodigal:2.6 CDS 188 658 . + 0 ID=KPFNHDNB_00001;inference=ab initio prediction:Prodigal:2.6;locus_tag=KPFNHDNB_00001;product=hypothetical protein -##FASTA ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/prokka-out/PROKKA_02062019.log Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,126 +0,0 @@ -[11:29:26] This is prokka 1.13.3 -[11:29:26] Written by Torsten Seemann <torsten.seemann@gmail.com> -[11:29:26] Homepage is https://github.com/tseemann/prokka -[11:29:26] Local time is Wed Feb 6 11:29:26 2019 -[11:29:26] You are dfornika -[11:29:26] Operating system is linux -[11:29:26] You have BioPerl 1.007002 -[11:29:26] System has 8 cores. -[11:29:26] Will use maximum of 8 cores. -[11:29:26] Annotating as >>> Bacteria <<< -[11:29:26] Generating locus_tag from 'reference.fasta' contents. -[11:29:26] Setting --locustag KPFNHDNB from MD5 49f71d7b5d5a34956f52c337db5c3269 -[11:29:26] Re-using existing --outdir prokka-out -[11:29:26] Using filename prefix: PROKKA_02062019.XXX -[11:29:26] Setting HMMER_NCPU=1 -[11:29:26] Writing log to: prokka-out/PROKKA_02062019.log -[11:29:26] Command: /home/dfornika/miniconda3/envs/prokka-1.13/bin/prokka reference.fasta --genus Listeria --species monocytogenes --outdir prokka-out --force -[11:29:26] Appending to PATH: /home/dfornika/miniconda3/envs/prokka-1.13/bin -[11:29:26] Looking for 'aragorn' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/aragorn -[11:29:26] Determined aragorn version is 1.2 -[11:29:26] Looking for 'barrnap' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/barrnap -[11:29:26] Determined barrnap version is 0.9 -[11:29:26] Looking for 'blastp' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/blastp -[11:29:26] Determined blastp version is 2.7 -[11:29:26] Looking for 'cmpress' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/cmpress -[11:29:26] Determined cmpress version is 1.1 -[11:29:26] Looking for 'cmscan' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/cmscan -[11:29:26] Determined cmscan version is 1.1 -[11:29:26] Looking for 'egrep' - found /usr/bin/egrep -[11:29:26] Looking for 'find' - found /usr/bin/find -[11:29:26] Looking for 'grep' - found /usr/bin/grep -[11:29:26] Looking for 'hmmpress' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/hmmpress -[11:29:26] Determined hmmpress version is 3.2 -[11:29:26] Looking for 'hmmscan' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/hmmscan -[11:29:26] Determined hmmscan version is 3.2 -[11:29:26] Looking for 'java' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/java -[11:29:26] Looking for 'less' - found /usr/bin/less -[11:29:26] Looking for 'makeblastdb' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/makeblastdb -[11:29:26] Determined makeblastdb version is 2.7 -[11:29:26] Looking for 'minced' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/minced -[11:29:26] Determined minced version is 3.2 -[11:29:26] Looking for 'parallel' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/parallel -[11:29:26] Determined parallel version is 20181022 -[11:29:26] Looking for 'prodigal' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/prodigal -[11:29:26] Determined prodigal version is 2.6 -[11:29:26] Looking for 'prokka-genbank_to_fasta_db' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/prokka-genbank_to_fasta_db -[11:29:26] Looking for 'sed' - found /usr/bin/sed -[11:29:26] Looking for 'tbl2asn' - found /home/dfornika/miniconda3/envs/prokka-1.13/bin/tbl2asn -[11:29:26] Determined tbl2asn version is 25.6 -[11:29:26] Using genetic code table 11. -[11:29:26] Loading and checking input file: reference.fasta -[11:29:26] Wrote 1 contigs totalling 700 bp. -[11:29:26] Predicting tRNAs and tmRNAs -[11:29:26] Running: aragorn -l -gc11 -w prokka\-out\/PROKKA_02062019\.fna -[11:29:26] Found 0 tRNAs -[11:29:26] Predicting Ribosomal RNAs -[11:29:26] Running Barrnap with 8 threads -[11:29:26] Found 0 rRNAs -[11:29:26] Skipping ncRNA search, enable with --rfam if desired. -[11:29:26] Total of 0 tRNA + rRNA features -[11:29:26] Searching for CRISPR repeats -[11:29:26] Found 0 CRISPRs -[11:29:26] Predicting coding sequences -[11:29:26] Contigs total 700 bp, so using meta mode -[11:29:26] Running: prodigal -i prokka\-out\/PROKKA_02062019\.fna -c -m -g 11 -p meta -f sco -q -[11:29:26] Found 1 CDS -[11:29:26] Connecting features back to sequences -[11:29:26] Not using genus-specific database. Try --usegenus to enable it. -[11:29:26] Annotating CDS, please be patient. -[11:29:26] Will use 8 CPUs for similarity searching. -[11:29:26] There are still 1 unannotated CDS left (started with 1) -[11:29:26] Will use blast to search against /home/dfornika/miniconda3/envs/prokka-1.13/db/kingdom/Bacteria/IS with 8 CPUs -[11:29:26] Running: cat prokka\-out\/IS\.faa | parallel --gnu --plain -j 8 --block 10 --recstart '>' --pipe blastp -query - -db /home/dfornika/miniconda3/envs/prokka-1.13/db/kingdom/Bacteria/IS -evalue 1e-30 -qcov_hsp_perc 90 -num_threads 1 -num_descriptions 1 -num_alignments 1 -seg no > prokka\-out\/IS\.blast 2> /dev/null -[11:29:27] Deleting unwanted file: prokka-out/IS.faa -[11:29:27] Deleting unwanted file: prokka-out/IS.blast -[11:29:27] There are still 1 unannotated CDS left (started with 1) -[11:29:27] Will use blast to search against /home/dfornika/miniconda3/envs/prokka-1.13/db/kingdom/Bacteria/AMR with 8 CPUs -[11:29:27] Running: cat prokka\-out\/AMR\.faa | parallel --gnu --plain -j 8 --block 10 --recstart '>' --pipe blastp -query - -db /home/dfornika/miniconda3/envs/prokka-1.13/db/kingdom/Bacteria/AMR -evalue 9.99999999999999e-301 -qcov_hsp_perc 90 -num_threads 1 -num_descriptions 1 -num_alignments 1 -seg no > prokka\-out\/AMR\.blast 2> /dev/null -[11:29:27] Deleting unwanted file: prokka-out/AMR.faa -[11:29:27] Deleting unwanted file: prokka-out/AMR.blast -[11:29:27] There are still 1 unannotated CDS left (started with 1) -[11:29:27] Will use blast to search against /home/dfornika/miniconda3/envs/prokka-1.13/db/kingdom/Bacteria/sprot with 8 CPUs -[11:29:27] Running: cat prokka\-out\/sprot\.faa | parallel --gnu --plain -j 8 --block 10 --recstart '>' --pipe blastp -query - -db /home/dfornika/miniconda3/envs/prokka-1.13/db/kingdom/Bacteria/sprot -evalue 1e-09 -qcov_hsp_perc 80 -num_threads 1 -num_descriptions 1 -num_alignments 1 -seg no > prokka\-out\/sprot\.blast 2> /dev/null -[11:29:27] Deleting unwanted file: prokka-out/sprot.faa -[11:29:27] Deleting unwanted file: prokka-out/sprot.blast -[11:29:27] There are still 1 unannotated CDS left (started with 1) -[11:29:27] Will use hmmer3 to search against /home/dfornika/miniconda3/envs/prokka-1.13/db/hmm/HAMAP.hmm with 8 CPUs -[11:29:27] Running: cat prokka\-out\/HAMAP\.hmm\.faa | parallel --gnu --plain -j 8 --block 10 --recstart '>' --pipe hmmscan --noali --notextw --acc -E 1e-09 --cpu 1 /home/dfornika/miniconda3/envs/prokka-1.13/db/hmm/HAMAP.hmm /dev/stdin > prokka\-out\/HAMAP\.hmm\.hmmer3 2> /dev/null -[11:29:28] Deleting unwanted file: prokka-out/HAMAP.hmm.faa -[11:29:28] Deleting unwanted file: prokka-out/HAMAP.hmm.hmmer3 -[11:29:28] Labelling remaining 1 proteins as 'hypothetical protein' -[11:29:28] Found 0 unique /gene codes. -[11:29:28] Fixed 0 colliding /gene names. -[11:29:28] Adding /locus_tag identifiers -[11:29:28] Assigned 1 locus_tags to CDS and RNA features. -[11:29:28] Writing outputs to prokka-out/ -[11:29:28] Generating annotation statistics file -[11:29:28] Generating Genbank and Sequin files -[11:29:28] Running: tbl2asn -V b -a r10k -l paired-ends -M n -N 1 -y 'Annotated using prokka 1.13.3 from https://github.com/tseemann/prokka' -Z prokka\-out\/PROKKA_02062019\.err -i prokka\-out\/PROKKA_02062019\.fsa 2> /dev/null -[11:29:28] Deleting unwanted file: prokka-out/errorsummary.val -[11:29:28] Deleting unwanted file: prokka-out/PROKKA_02062019.dr -[11:29:28] Deleting unwanted file: prokka-out/PROKKA_02062019.fixedproducts -[11:29:28] Deleting unwanted file: prokka-out/PROKKA_02062019.ecn -[11:29:28] Deleting unwanted file: prokka-out/PROKKA_02062019.val -[11:29:28] Repairing broken .GBK output that tbl2asn produces... -[11:29:28] Running: sed 's/COORDINATES: profile/COORDINATES:profile/' < prokka\-out\/PROKKA_02062019\.gbf > prokka\-out\/PROKKA_02062019\.gbk -[11:29:28] Deleting unwanted file: prokka-out/PROKKA_02062019.gbf -[11:29:28] Output files: -[11:29:28] prokka-out/PROKKA_02062019.faa -[11:29:28] prokka-out/PROKKA_02062019.log -[11:29:28] prokka-out/PROKKA_02062019.tbl -[11:29:28] prokka-out/PROKKA_02062019.ffn -[11:29:28] prokka-out/PROKKA_02062019.err -[11:29:28] prokka-out/PROKKA_02062019.txt -[11:29:28] prokka-out/PROKKA_02062019.gff -[11:29:28] prokka-out/PROKKA_02062019.tsv -[11:29:28] prokka-out/PROKKA_02062019.sqn -[11:29:28] prokka-out/PROKKA_02062019.fna -[11:29:28] prokka-out/PROKKA_02062019.fsa -[11:29:28] prokka-out/PROKKA_02062019.gbk -[11:29:28] Annotation finished successfully. -[11:29:28] Walltime used: 0.03 minutes -[11:29:28] If you use this result please cite the Prokka paper: -[11:29:28] Seemann T (2014) Prokka: rapid prokaryotic genome annotation. Bioinformatics. 30(14):2068-9. -[11:29:28] Type 'prokka --citation' for more details. -[11:29:28] Thank you, come again.
--- a/test-data/prokka-out/PROKKA_02062019.sqn Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ -Seq-entry ::= set { - class genbank , - seq-set { - set { - class nuc-prot , - descr { - source { - org { - taxname "Listeria monocytogenes" , - orgname { - mod { - { - subtype strain , - subname "strain" } } , - gcode 11 } } } , - comment "Annotated using prokka 1.13.3 from - https://github.com/tseemann/prokka" , - user { - type - str "NcbiCleanup" , - data { - { - label - str "method" , - data - str "SeriousSeqEntryCleanup" } , - { - label - str "version" , - data - int 8 } , - { - label - str "month" , - data - int 2 } , - { - label - str "day" , - data - int 6 } , - { - label - str "year" , - data - int 2019 } } } , - create-date - std { - year 2019 , - month 2 , - day 6 } } , - seq-set { - seq { - id { - local - str "reference" } , - descr { - molinfo { - biomol genomic } } , - inst { - repr raw , - mol dna , - length 700 , - seq-data - iupacna "TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATT -GAAAAGGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTTCATTGCGCCAATG -TGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTTGTGAAAAATGATATTTCAACGGCTTTTGAAA -ACTAGAGATACAGAGTTTTATCGAGTTATACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGAT -AAACGGGAACCAGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTCAGTGAAT -ATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGAAGAACAGGAAATTCCAAAATTTACT -AAAGTTATTCGTTTTATAGAAGGACTTTTATCTTTTCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAG -GAAAAACTAATTTTTGAATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGGT -CGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATTTTCGAAAAATGGTTTTTCT -CTCTATAAAAATATGATATGA" } } , - seq { - id { - local - str "reference_1" } , - descr { - title "hypothetical protein KPFNHDNB_00001 [Listeria - monocytogenes]" , - molinfo { - biomol peptide , - tech concept-trans } } , - inst { - repr raw , - mol aa , - length 156 , - seq-data - ncbieaa "MIFQRLLKTRDTEFYRVIQNRNIDDVFGYLLIHDKREPAEIDDFKVFAKSNINKE -AFSVNIKKNHIYTMFFHFTDLEEEQEIPKFTKVIRFIEGLLSFQPETSHYVDNYLIKEKLIFEYPAEFEKIGEFAKYL -VKLSGRKITIPDTTREKYIYLTQ" } , - annot { - { - data - ftable { - { - id - local - id 2 , - data - prot { - name { - "hypothetical protein" } } , - location - int { - from 0 , - to 155 , - id - local - str "reference_1" } } } } } } } , - annot { - { - data - ftable { - { - id - local - id 1 , - data - cdregion { - frame one , - code { - id 11 } } , - product - whole - local - str "reference_1" , - location - int { - from 187 , - to 657 , - strand plus , - id - local - str "reference" } , - qual { - { - qual "inference" , - val "ab initio prediction:Prodigal:2.6" } } , - xref { - { - data - gene { - locus-tag "KPFNHDNB_00001" } } } } } } } } } }
--- a/test-data/prokka-out/PROKKA_02062019.tbl Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ ->Feature reference -188 658 CDS - inference ab initio prediction:Prodigal:2.6 - locus_tag KPFNHDNB_00001 - product hypothetical protein
--- a/test-data/prokka-out/PROKKA_02062019.tsv Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -locus_tag ftype length_bp gene EC_number COG product -KPFNHDNB_00001 CDS 471 hypothetical protein
--- a/test-data/prokka-out/PROKKA_02062019.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -organism: Listeria monocytogenes strain -contigs: 1 -bases: 700 -CDS: 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fna Tue Mar 12 17:37:29 2019 -0400 @@ -0,0 +1,11 @@ +>reference +TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAAGGTTATCCAC +AATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTTCATTGCGCCAATGTGTTAAA +CTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTTGTGAAAAATGATATTTCAACGGCTTTTGAA +AACTAGAGATACAGAGTTTTATCGAGTTATACAAAACAGGAATATTGACGACGTATTTGGATACTTATTA +ATTCACGATAAACGGGAACCAGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAG +AAGCTTTTTCAGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA +AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTTTCAGCCAGAA +ACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGAATATCCTGCTGAATTTGAGA +AAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGGTCGTAAAATTACTATTCCAGACACAACGAG +AGAAAAATATATCTATTTAACGCAATAATTTTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/reference.fasta.fai Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -reference 700 11 70 71
--- a/test-data/reference.gbk Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -LOCUS reference 700 bp DNA linear 06-FEB-2019 -DEFINITION Listeria monocytogenes strain strain. -ACCESSION -VERSION -KEYWORDS . -SOURCE Listeria monocytogenes - ORGANISM Listeria monocytogenes - Unclassified. -COMMENT Annotated using prokka 1.13.3 from - https://github.com/tseemann/prokka. -FEATURES Location/Qualifiers - source 1..700 - /organism="Listeria monocytogenes" - /mol_type="genomic DNA" - /strain="strain" - CDS 188..658 - /locus_tag="KPFNHDNB_00001" - /inference="ab initio prediction:Prodigal:2.6" - /codon_start=1 - /transl_table=11 - /product="hypothetical protein" - /translation="MIFQRLLKTRDTEFYRVIQNRNIDDVFGYLLIHDKREPAEIDDF - KVFAKSNINKEAFSVNIKKNHIYTMFFHFTDLEEEQEIPKFTKVIRFIEGLLSFQPET - SHYVDNYLIKEKLIFEYPAEFEKIGEFAKYLVKLSGRKITIPDTTREKYIYLTQ" -ORIGIN - 1 tccacaagcc attgtgtgta attaaccact aattgtgtat aagtttaaac taattgaaaa - 61 ggttatccac aataaaaagg cgttattcag gagttatcca cactttctag gaaaggattt - 121 cattgcgcca atgtgttaaa ctatttaccg aatacgaaaa aaagacaaat aaatgaggtt - 181 gtgaaaaatg atatttcaac ggcttttgaa aactagagat acagagtttt atcgagttat - 241 acaaaacagg aatattgacg acgtatttgg atacttatta attcacgata aacgggaacc - 301 agcagaaatt gacgatttta aggtatttgc aaaaagtaat ataaataaag aagctttttc - 361 agtgaatatc aaaaaaaatc atatttacac gatgtttttc cactttactg atttagagga - 421 agaacaggaa attccaaaat ttactaaagt tattcgtttt atagaaggac ttttatcttt - 481 tcagccagaa acaagccatt acgttgataa ctatttaata aaggaaaaac taatttttga - 541 atatcctgct gaatttgaga aaatcgggga gtttgctaaa tatttagtaa agctttcggg - 601 tcgtaaaatt actattccag acacaacgag agaaaaatat atctatttaa cgcaataatt - 661 ttcgaaaaat ggtttttctc tctataaaaa tatgatatga -//
--- a/test-data/snippy-core-out/core.aln Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ ->Reference -AA ->a -AA ->b -TA ->c -AC
--- a/test-data/snippy-core-out/core.full.aln Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ ->Reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAA -GGTTATCCACAATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTT -CATTGCGCCAATGTGTTAAACTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTT -GTGAAAAATGATATTTCAACGGCTTTTGAAAACTAGAGATACAGAGTTTTATCGAGTTAT -ACAAAACAGGAATATTGACGACGTATTTGGATACTTATTAATTCACGATAAACGGGAACC -AGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAGAAGCTTTTTC -AGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTT -TCAGCCAGAAACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGA -ATATCCTGCTGAATTTGAGAAAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGG -TCGTAAAATTACTATTCCAGACACAACGAGAGAAAAATATATCTATTTAACGCAATAATT -TTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA ->a -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ----------------------------------------- ->b -TCCtCAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ----------------------------------------- ->c -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAcACT--------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------
--- a/test-data/snippy-core-out/core.ref.fa Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ ->reference -TCCACAAGCCATTGTGTGTAATTAACCACTAATTGTGTATAAGTTTAAACTAATTGAAAAGGTTATCCAC -AATAAAAAGGCGTTATTCAGGAGTTATCCACACTTTCTAGGAAAGGATTTCATTGCGCCAATGTGTTAAA -CTATTTACCGAATACGAAAAAAAGACAAATAAATGAGGTTGTGAAAAATGATATTTCAACGGCTTTTGAA -AACTAGAGATACAGAGTTTTATCGAGTTATACAAAACAGGAATATTGACGACGTATTTGGATACTTATTA -ATTCACGATAAACGGGAACCAGCAGAAATTGACGATTTTAAGGTATTTGCAAAAAGTAATATAAATAAAG -AAGCTTTTTCAGTGAATATCAAAAAAAATCATATTTACACGATGTTTTTCCACTTTACTGATTTAGAGGA -AGAACAGGAAATTCCAAAATTTACTAAAGTTATTCGTTTTATAGAAGGACTTTTATCTTTTCAGCCAGAA -ACAAGCCATTACGTTGATAACTATTTAATAAAGGAAAAACTAATTTTTGAATATCCTGCTGAATTTGAGA -AAATCGGGGAGTTTGCTAAATATTTAGTAAAGCTTTCGGGTCGTAAAATTACTATTCCAGACACAACGAG -AGAAAAATATATCTATTTAACGCAATAATTTTCGAAAAATGGTTTTTCTCTCTATAAAAATATGATATGA
--- a/test-data/snippy-core-out/core.tab Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -CHR POS REF a b c -reference 4 A A T A -reference 48 A A A C
--- a/test-data/snippy-core-out/core.txt Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -ID LENGTH ALIGNED UNALIGNED VARIANT HET MASKED LOWCOV -a 700 51 649 0 0 0 0 -b 700 51 649 1 0 0 0 -c 700 51 649 1 0 0 0 -Reference 700 700 0 0 0 0 0
--- a/test-data/snippy-core-out/core.vcf Fri Mar 08 20:46:56 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -##fileformat=VCFv4.2 -##commandLine="snippy-core --ref reference.fasta a b c" -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##INFO=<ID=TYPE,Number=A,Type=String,Description="Allele type: snp ins del"> -##contig=<ID=reference,len=700> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a b c -reference 4 . A T . PASS TYPE=snp GT 0 1 0 -reference 48 . A C . PASS TYPE=snp GT 0 0 1