Mercurial > repos > iuc > khmer
changeset 0:0187f18785a3 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 37727831a2630b7a7d4fb033366cbd772c3086c8
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abundance-dist-single.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,94 @@ +<tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" version="@WRAPPER_VERSION@-5"> + <description> + Calculate abundance distribution of the k-mers in a given sequence file + </description> + <macros> + <token name="@BINARY@">abundance-dist-single.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +mkdir output && cd output && +@BINARY@ +@TABLEPARAMS@ +${zero} +${bigcount} +#if $save_countgraph + --savegraph=${optional_output_countgraph} +#end if +--squash +@THREADS@ +${input_sequence_filename} +${output_histogram_filename} +]]> + </command> + <inputs> + <expand macro="input_sequence_filename" /> + <param name="save_countgraph" + type="boolean" + label="Save the k-mer countgraph to a file" + help="(--savegraph)" /> + <expand macro="input_zero" /> + <expand macro="input_bigcount" /> + <expand macro="tableinputs" /> + </inputs> + <outputs> + <data name="optional_output_countgraph" + format="oxlicg" + label="${tool.name} k-mer countgraph"> + <filter>save_countgraph == True</filter> + </data> + <expand macro="abundance-histogram-output" /> + </outputs> + <tests> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="type" value="specific" /> + <param name="tablesize_specific" value="1e7" /> + <param name="n_tables" value="2" /> + <param name="ksize" value="17" /> + <param name="no_zero" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_text text="1,96,96,0.98" /> + <has_text text="1001,2,98,1.0" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="type" value="specific" /> + <param name="tablesize_specific" value="1e7" /> + <param name="n_tables" value="2" /> + <param name="ksize" value="17" /> + <param name="no_zero" value="false" /> + <param name="bigcount" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_text text="1,96,96,0.98" /> + <has_text text="255,2,98,1.0" /> + </assert_contents> + </output> + </test> + + </tests> + <help><![CDATA[ +Calculate the abundance distribution of k-mers from a single sequence file. + +Note that with `-b` this script is constant memory; in exchange, +k-mer counts will stop at 255. The memory usage of this script with +`-b` will be about 1.15x the product of the `-x` and +`-N` numbers. + + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abundance-dist.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,69 @@ +<tool id="gedlab-khmer-abundance-dist" name="Abundance Distribution" version="@WRAPPER_VERSION@-4"> + <description> + Calculate abundance distribution of the k-mers in a given sequence file using a pre-made k-mer countgraph + </description> + <macros> + <token name="@BINARY@">abundance-dist.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +mkdir output && cd output && +@BINARY@ +--squash +${zero} +${bigcount} +${input_countgraph_filename} +${input_sequence_filename} +${output_histogram_filename} +]]> + </command> + <inputs> + <expand macro="input_countgraph_filename" /> + <expand macro="input_sequence_filename" /> + <expand macro="input_zero" /> + <expand macro="input_bigcount" /> + </inputs> + <outputs> + <expand macro="abundance-histogram-output" /> + </outputs> + <tests> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <param name="zero" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_text text="1,96,96,0.98" /> + <has_text text="1001,2,98,1.0" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <param name="zero" value="false" /> + <param name="bigcount" value="false" /> + <output name="output_histogram_filename"> + <assert_contents> + <has_text text="1,96,96,0.98" /> + <has_text text="255,2,98,1.0" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Calculate abundance distribution of the k-mers in the sequence file using a +pre-made k-mer countgraph. + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count-median.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,55 @@ +<tool id="gedlab-khmer-count-median" name="Count Median" version="@WRAPPER_VERSION@-1"> + <description>Count the median/avg k-mer abundance for each sequence in the input file</description> + <macros> + <token name="@BINARY@">count-median.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +@BINARY@ +$input_countgraph_filename +$input_sequence_filename +$output_summary_filename +]]> + </command> + <inputs> + <expand macro="input_sequence_filename" /> + <expand macro="input_countgraph_filename" /> + </inputs> + <outputs> + <data name="output_summary_filename" format="txt" + label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" /> + </outputs> + <tests> + <test> + <param name="input_sequence_filename" value="test-abund-read-2.fa" /> + <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <output name="output_summary_filename"> + <assert_contents> + <has_text text="seq,1001,1001.0,0.0,18" /> + <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Count the median/avg k-mer abundance for each sequence in the input file, +based on the k-mer counts in the given k-mer countgraph. Can be used to +estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The +output file contains sequence id, median, average, stddev, and seq length; +fields are separated by spaces. For khmer 1.x count-median.py will split +sequence names at the first space which means that some sequence formats (e.g. +paired FASTQ in Casava 1.8 format) will yield uninformative names. Use +`--csv` to fix this behavior. + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="diginorm-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/do-partition.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,58 @@ +<tool id="gedlab-khmer-do-partition" name="Sequence partition all-in-one" version="@WRAPPER_VERSION@-4"> + <description>Load, partition, and annotate FAST[AQ] sequences</description> + <macros> + <token name="@BINARY@">do-partition.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +#for $num, $input in enumerate($inputs) +ln -s ${input} sequence-${num} && +#end for +mkdir -p output && cd output && +@BINARY@ +@TABLEPARAMS@ +@THREADS@ +output +../sequence-* +&& +mv output.info $information +]]> + </command> + <inputs> + <expand macro="input_sequences_filenames" /> + <expand macro="tableinputs" /> + </inputs> + <outputs> + <data name="information" format="txt" label="${tool.name} summary." /> + <collection name="annotated_sequence_files" type="list"> + <discover_datasets pattern="__name__" directory="output" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputs" value="random-20-a.fa"/> + <output_collection name="annotated_sequence_files" type="list"> + <element name="sequence-0.part" file="random-20-a.fa.part" /> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Load in a set of sequences, partition them, merge the partitions, and +annotate the original sequences files with the partition information. + +This script combines the functionality of `load-graph.py`, +`partition-graph.py`, `merge-partitions.py`, and +`annotate-partitions.py` into one script. This is convenient +but should probably not be used for large data sets, because +`do-partition.py` doesn't provide save/resume functionality. +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="graph-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract-partitions.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,60 @@ +<tool id="gedlab-khmer-extract-partitions" name="Extract partitions" version="@WRAPPER_VERSION@-2"> + <description>Separate sequences that are annotated with partitions into grouped files</description> + <macros> + <token name="@BINARY@">extract-partitions.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +mkdir -p output && +cd output && +@BINARY@ +--max-size $max_size +--min-partition-size $min_partition_size +$output_unassigned +output +#for input in $inputs +$input +#end for +&& +mv output.dist $distribution +]]> + </command> + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="max_size" type="integer" label="Max group size" value="1000000" + help="No more than this many number of sequences will be stored in each output (--max-size/-X)"/> + <param name="min_partition_size" type="integer" label="Min partition size" value="5" + help="The minimum partition size worth keeping (--min-partition-size/-m)" /> + <param name="output_unassigned" type="boolean" checked="false" truevalue="--output-unassigned" falsevalue="" + label="Output unassigned sequences" help="(--output-unassigned/-U)" /> + </inputs> + <outputs> + <data name="distribution" format="txt" label="Partition size distribution from ${tool.name}" /> + <collection name="groups-of-partitions" type="list"> + <discover_datasets pattern="__name__" directory="output" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputs" value="random-20-a.fa.part"/> + <output_collection name="groups-of-partitions"> + <element name="output.group0000.part" file="random-20-a.part.extract.fa" /> + </output_collection> + </test> + + </tests> + <help><![CDATA[ +Separate sequences that are annotated with partitions into grouped files. + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="graph-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-abund.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,81 @@ +<tool id="gedlab-khmer-filter-abund" name="Filter by abundance" version="@WRAPPER_VERSION@-5"> + <description> + Trims fastq/fasta sequences at k-mers of a given abundance + based on a provided k-mer countgraph + </description> + <macros> + <token name="@BINARY@">filter-abund.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +#for $num, $input in enumerate($inputs) +ln -s ${input} filter-abund-sequence-${num} && +#end for +mkdir output && cd output && +@BINARY@ +--cutoff=${cutoff} +${variable_coverage} +@THREADS@ +${input_countgraph_filename} +../filter-abund-sequence* +]]> + </command> + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="variable_coverage" type="boolean" checked="false" truevalue="--variable-coverage" falsevalue="" + label="Variable coverage" + help="Only trim when a sequence has high enough coverage; median abundance > 20 (--variable_coverage)" /> + <param name="cutoff" type="integer" value="2" label="Cutoff" + help="Trim at k-mers below this abundance. (--cutoff)" /> + <expand macro="input_countgraph_filename" /> + </inputs> + <outputs> + <collection name="filter-abund-sequences" type="list"> + <discover_datasets pattern="__name__" directory="output" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="input_countgraph_filename" + value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <output_collection name="filter-abund-sequences" type="list"> + <element name="filter-abund-sequence-0.abundfilt"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="input_countgraph_filename" + value="test-abund-read-2.oxlicg" ftype="oxlicg" /> + <param name="cutoff" value="1" /> + <output_collection name="filter-abund-sequences" type="list"> + <element name="filter-abund-sequence-0.abundfilt"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Trim sequences at a minimum k-mer abundance. + +If the input sequences are from RNAseq or metagenome sequencing then +`--variable-coverage` should be used. + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-below-abund.py Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,87 @@ +#! /usr/bin/env python +# This file is part of khmer, https://github.com/dib-lab/khmer/, and is +# Copyright (C) 2011-2015, Michigan State University. +# Copyright (C) 2015, The Regents of the University of California. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of the Michigan State University nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Contact: khmer-project@idyll.org +from __future__ import print_function +import sys +import os +import khmer +from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter + +WORKER_THREADS = 8 +GROUPSIZE = 100 + +CUTOFF = 50 + +### + + +def main(): + counting_ht = sys.argv[1] + infiles = sys.argv[2:] + + print('file with ht: %s' % counting_ht) + print('-- settings:') + print('N THREADS', WORKER_THREADS) + print('--') + + print('making hashtable') + ht = khmer.load_countgraph(counting_ht) + K = ht.ksize() + + for infile in infiles: + print('filtering', infile) + outfile = os.path.basename(infile) + '.below' + + outfp = open(outfile, 'w') + + def process_fn(record, ht=ht): + name = record['name'] + seq = record['sequence'] + if 'N' in seq: + return None, None + + trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) + + if trim_at >= K: + return name, trim_seq + + return None, None + + tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) + + tsp.start(verbose_fasta_iter(infile), outfp) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-below-abund.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,50 @@ +<tool id="gedlab-khmer-filter-below-abund" name="Filter below abundance cutoff of 50" version="@WRAPPER_VERSION@-3"> + <description> + Trims fastq/fasta sequences at k-mers with abundance below 50 + based on a provided k-mer countgraph + </description> + <macros> + <token name="@BINARY@">filter-below-abund.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +#for $num, $input in enumerate($inputs) +ln -s ${input} sequence-${num} && +mkdir output && cd output && +@BINARY@ +${input_countgraph_filename} +../sequence* +]]> + </command> + <inputs> + <expand macro="input_sequences_filenames" /> + <expand macro="input_countgraph_filename" /> + </inputs> + <outputs> + <collection name="sequence_files" type="list"> + <discover_datasets pattern="__name__" directory="output" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="input_countgraph_filename" value="test-abund-read-2.large.oxlicg" ftype="oxlicg" /> + <output_collection name="sequence_files"> + <element name="sequence-0.below" file="test-abund-read-2.fa.below" /> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Trims sequences at the first kmer with abundance above 50. + +@HELP_FOOTER@ +]]></help> + <citations> + <expand macro="software-citation" /> + <expand macro="counting-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,146 @@ +<macros> + <token name="@WRAPPER_VERSION@">2.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@WRAPPER_VERSION@">khmer</requirement> + </requirements> + </xml> + <xml name="version"> + <version_command>@BINARY@ --version</version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + </xml> + <token name="@TABLEPARAMS@"> +#if $parameters.type == "simple" + --ksize=20 + --n_tables=4 + --max-tablesize=$parameters.tablesize +#else + --ksize=$parameters.ksize + --n_tables=$parameters.n_tables + --max-tablesize="$parameters.tablesize_specific" +#end if + </token> + <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token> + <xml name="tableinputs"> + <conditional name="parameters"> + <param name="type" type="select" label="Advanced Parameters" + help="ksize, n_tables, a specific tablesize" > + <option value="simple" selected="true">Hide</option> + <option value="specific">Show</option> + </param> + <when value="simple"> + <param name="tablesize" type="select" label="Sample Type" display="radio"> + <option value="1e9" selected="true">Microbial Genome</option> + <option value="2e9">Animal Transcriptome</option> + <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option> + <option value="16e9">Large Animal Genome</option> + </param> + </when> + <when value="specific"> + <param name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> + <param name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> + <param name="tablesize_specific" type="text" value="1000000.0" + label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" /> + </when> + </conditional> + </xml> + <xml name="input_sequences_filenames"> + <param name="inputs" multiple="true" type="data" format="fasta,fastq" + label="Sequences in FASTA or FASTQ format" + help="Put in order of precedence such as longest reads first." /> + </xml> + <xml name="input_sequence_filename"> + <param name="input_sequence_filename" type="data" format="fasta,fastq" + label="Sequence in FASTA or FASTQ format" /> + </xml> + <xml name="input_countgraph_filename"> + <param name="input_countgraph_filename" type="data" format="oxlicg" + label="the k-mer countgraph to query" + help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." /> + </xml> + <xml name="abundance-histogram-output"> + <data name="output_histogram_filename" format="txt" + label="${tool.name} k-mer abundance histogram. The + columns are: (1) k-mer abundance, (2) k-mer count, (3) + cumulative count, (4) fraction of total distinct k-mers." /> + </xml> + <xml name="output_sequences"> + <data name="output" format_source="inputs" + label="${tool.name} processed nucleotide sequence file"> + <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true"/> + </data> + </xml> + <xml name="output_sequences_single"> + <data name="output" format_source="input_sequence_filename" + label="${tool.name} processed nucleotide sequence file" /> + </xml> + <xml name="input_zero"> + <param name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true" + help="Output zero count bins (--no-zero)" /> + </xml> + <xml name="input_bigcount"> + <param name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount" + checked="true" help="Count k-mers past 255 occurences (--no-bigcount)" /> + </xml> + <token name="@HELP_FOOTER@"><![CDATA[ +(from the khmer project: http://khmer.readthedocs.org/en/v@WRAPPER_VERSION@/ )]]></token> + <xml name="software-citation"> + <citation type="bibtex">@article{khmer2015, + author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine + and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau, + Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton, + Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and + Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah + and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and + Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David + and Lippi, Justin and Mansour, Tamer and McA'Nulty, Pamela and McDonald, Eric + and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy, + Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory, + Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and + Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and + Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan, + Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and + Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather + L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and + Brown, C. Titus" + title = "The khmer software package: enabling efficient nucleotide + sequence analysis", + year = "2015", + month = "08", + publisher = "F1000", + url = "http://dx.doi.org/10.12688/f1000research.6924.1" + }</citation> + </xml> + <xml name="diginorm-citation"> + <citation type="bibtex">@unpublished{diginorm, + author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz, +Alexis B and Brom, Timothy H", + title = "A Reference-Free Algorithm for Computational Normalization of +Shotgun Sequencing Data", + year = "2012", + eprint = "arXiv:1203.4802", + url = "http://arxiv.org/abs/1203.4802", +}</citation></xml> + <xml name="graph-citation"> + <citation type="bibtex">@article{Pell2012, + doi = {10.1073/pnas.1121464109}, + url = {http://dx.doi.org/10.1073/pnas.1121464109}, + year = {2012}, + month = {jul}, + publisher = {Proceedings of the National Academy of Sciences}, + volume = {109}, + number = {33}, + pages = {13272--13277}, + author = {J. Pell and A. Hintze and R. Canino-Koning and A. Howe and J. M. Tiedje and C. T. Brown}, + title = {Scaling metagenome sequence assembly with probabilistic de Bruijn graphs}, + journal = {Proceedings of the National Academy of Sciences} + }</citation> + </xml> + <xml name="counting-citation"> + <citation type="doi">10.1371/journal.pone.0101271</citation> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize-by-median.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,139 @@ +<tool id="gedlab-khmer-normalize-by-median" name="Normalize By Median" version="@WRAPPER_VERSION@-4"> + <description>Filters a fastq/fasta file using digital normalization via median k-mer abundances</description> + <macros> + <token name="@BINARY@">normalize-by-median.py</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ +set -xu && +#for $num, $input in enumerate($inputs) + ln -s ${input} sequence-${num} && +#end for +mkdir output && +cd output && +normalize-by-median.py +${paired_switch} +${force_single_switch} +@TABLEPARAMS@ +--cutoff=${cutoff} +#if $unpaired_reads_filename + --unpaired-reads=${unpaired_reads_filename} +#end if +#if $save_countgraph + --savegraph=${countgraph} +#end if +#if $countgraph_to_load + --loadgraph=${countgraph_to_load} +#end if +--report=${report} +../sequence-* +]]> + </command> + <inputs> + <expand macro="input_sequences_filenames" /> + <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue="" + label="Require all sequences be properly paired?" + help="(--paired) The tool will fail if given improperly paired reads and this option is selected." /> + <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue="" + label="Ignore all pairing information?" + help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." /> + <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true" + label="Extra unpaired reads" + help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> + <param name="countgraph_to_load" type="data" format="oxlicg" optional="true" + label="Optional k-mer countgraph" + help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> + <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" /> + <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" /> + <expand macro="tableinputs" /> + </inputs> + <outputs> + <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph"> + <filter>save_countgraph == True</filter> + </data> + <data name="report" format="txt" label="${tool.name} report" /> + <collection name="sequences" type="list"> + <discover_datasets pattern="__name__" directory="output" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputs" value="test-abund-read-2.fa"/> + <param name="type" value="specific" /> + <param name="cutoff" value="1" /> + <param name="ksize" value="17" /> + <output name="report" file="normalize-by-median.report.txt" /> + <output_collection name="sequences" type="list"> + <element name="sequence-0.keep"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="inputs" value="test-abund-read-2.fa" /> + <param name="type" value="specific" /> + <param name="cutoff" value="2" /> + <param name="ksize" value="17" /> + <output name="report" file="normalize-by-median.c2.report.txt" /> + <output_collection name="sequences" type="list"> + <element name="sequence-0.keep"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGGGG" /> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="inputs" value="test-abund-read-paired.fa" /> + <param name="type" value="specific" /> + <param name="cutoff" value="1" /> + <param name="ksize" value="17" /> + <param name="paired" value="true" /> + <output name="report" file="normalize-by-median.paired.report.txt" /> + <output_collection name="sequences" type="list"> + <element name="sequence-0.keep"> + <assert_contents> + <has_text text="GGTTGACGGGGCTCAGGGGG" /> + <has_text text="GGTTGACGGGGCTCAGGG" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Do digital normalization (remove mostly redundant sequences) + +Discard sequences based on whether or not their median k-mer abundance lies +above a specified cutoff. Kept sequences will be placed in <fileN>.keep. + +By default, Paired end reads will be considered together; if either read will +be kept, then both will be kept. (This keeps both reads from a fragment, and +helps with retention of repeats.) Unpaired reads are treated individually. + +If `--paired` is set then proper pairing is required and the tool will exit on +unpaired reads, although `--unpaired-reads` can be used to supply a file of +orphan reads to be read after the paired reads. + +`--force_single` will ignore all pairing information and treat reads +individually. + +With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified +file after all sequences have been processed. `--loadgraph` will load the +specified k-mer countgraph before processing the specified files. Note +that the countgraph is in same format as those produced by +`load-into-counting.py` and consumed by `abundance-dist.py`. + +@HELP_FOOTER@ +]]> + </help> + <citations> + <expand macro="software-citation" /> + <expand macro="diginorm-citation" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="We require the khmer package and the oxli datatype definitions."> + <repository changeset_revision="be6719477611" name="package_khmer_2_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/normalize-by-median.c2.report.txt Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,2 @@ +total,kept,f_kept +1001,2,0.001998
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/normalize-by-median.paired.report.txt Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,2 @@ +total,kept,f_kept +6,2,0.3333
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/normalize-by-median.report.txt Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,2 @@ +total,kept,f_kept +1001,1,0.000999
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random-20-a.fa Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,198 @@ +>35 +CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC +>16 +CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT +>46 +GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT +>40 +GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG +>33 +GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG +>98 +ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC +>17 +CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA +>89 +GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA +>30 +GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA +>82 +ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT +>60 +GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA +>83 +CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG +>12 +AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG +>85 +CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC +>2 +CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC +>45 +ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA +>11 +GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG +>39 +CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA +>26 +AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA +>75 +GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG +>81 +GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG +>97 +ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC +>13 +AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT +>92 +ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG +>56 +AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA +>61 +TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA +>96 +ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG +>31 +CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC +>29 +TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT +>54 +TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC +>0 +TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT +>90 +GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG +>34 +TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG +>43 +AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG +>8 +ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG +>37 +TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC +>51 +ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG +>32 +GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG +>78 +TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC +>18 +CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG +>36 +TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA +>53 +ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC +>24 +AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC +>7 +AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC +>9 +AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT +>47 +CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA +>62 +ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA +>79 +TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG +>48 +TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC +>66 +GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA +>25 +GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC +>5 +TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC +>72 +ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT +>76 +CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT +>69 +GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT +>87 +CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT +>27 +TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC +>77 +TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC +>95 +TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC +>63 +TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC +>38 +CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG +>20 +GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC +>88 +GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT +>49 +TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG +>91 +TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG +>86 +CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA +>42 +CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC +>70 +ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG +>19 +GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG +>84 +AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG +>52 +TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT +>71 +AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA +>93 +CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA +>58 +TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT +>22 +TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG +>50 +ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG +>21 +TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT +>73 +CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC +>68 +CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT +>23 +GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT +>94 +AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA +>10 +TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA +>41 +GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC +>80 +TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG +>64 +AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA +>57 +TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC +>1 +GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT +>55 +GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC +>67 +GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT +>14 +CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG +>15 +AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT +>59 +TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA +>28 +CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA +>74 +CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA +>4 +TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC +>65 +TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT +>6 +ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG +>44 +CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC +>3 +TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random-20-a.fa.part Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,198 @@ +>35 2 +CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC +>16 2 +CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT +>46 2 +GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT +>40 2 +GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG +>33 2 +GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG +>98 2 +ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC +>17 2 +CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA +>89 2 +GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA +>30 2 +GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA +>82 2 +ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT +>60 2 +GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA +>83 2 +CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG +>12 2 +AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG +>85 2 +CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC +>2 2 +CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC +>45 2 +ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA +>11 2 +GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG +>39 2 +CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA +>26 2 +AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA +>75 2 +GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG +>81 2 +GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG +>97 2 +ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC +>13 2 +AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT +>92 2 +ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG +>56 2 +AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA +>61 2 +TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA +>96 2 +ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG +>31 2 +CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC +>29 2 +TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT +>54 2 +TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC +>0 2 +TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT +>90 2 +GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG +>34 2 +TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG +>43 2 +AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG +>8 2 +ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG +>37 2 +TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC +>51 2 +ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG +>32 2 +GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG +>78 2 +TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC +>18 2 +CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG +>36 2 +TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA +>53 2 +ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC +>24 2 +AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC +>7 2 +AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC +>9 2 +AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT +>47 2 +CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA +>62 2 +ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA +>79 2 +TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG +>48 2 +TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC +>66 2 +GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA +>25 2 +GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC +>5 2 +TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC +>72 2 +ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT +>76 2 +CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT +>69 2 +GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT +>87 2 +CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT +>27 2 +TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC +>77 2 +TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC +>95 2 +TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC +>63 2 +TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC +>38 2 +CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG +>20 2 +GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC +>88 2 +GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT +>49 2 +TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG +>91 2 +TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG +>86 2 +CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA +>42 2 +CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC +>70 2 +ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG +>19 2 +GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG +>84 2 +AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG +>52 2 +TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT +>71 2 +AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA +>93 2 +CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA +>58 2 +TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT +>22 2 +TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG +>50 2 +ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG +>21 2 +TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT +>73 2 +CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC +>68 2 +CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT +>23 2 +GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT +>94 2 +AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA +>10 2 +TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA +>41 2 +GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC +>80 2 +TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG +>64 2 +AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA +>57 2 +TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC +>1 2 +GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT +>55 2 +GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC +>67 2 +GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT +>14 2 +CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG +>15 2 +AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT +>59 2 +TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA +>28 2 +CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA +>74 2 +CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA +>4 2 +TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC +>65 2 +TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT +>6 2 +ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG +>44 2 +CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC +>3 2 +TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.fa Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,2002 @@ +>895:1:37:17593:9954/1 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG +>seq +GGTTGACGGGGCTCAGGG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.fa.below Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,2 @@ +>895:1:37:17593:9954/1 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.large.oxlicg.info Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,4 @@ +through test-abund-read-2.fa +Total number of unique k-mers: 83 +fp rate estimated to be 0.000 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-2.oxlicg.info Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,4 @@ +through /home/mcrusoe/khmer/tests/test-data/test-abund-read-2.fa +Total number of unique k-mers: 98 +fp rate estimated to be 0.000 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-abund-read-paired.fa Sat Oct 17 04:02:33 2015 -0400 @@ -0,0 +1,12 @@ +>895:1:37:17593:9954/1 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954/2 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954 1::FOO +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954 2::FOO +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954/1 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954/2 +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
