changeset 0:0187f18785a3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 37727831a2630b7a7d4fb033366cbd772c3086c8
author iuc
date Sat, 17 Oct 2015 04:02:33 -0400
parents
children
files abundance-dist-single.xml abundance-dist.xml count-median.xml do-partition.xml extract-partitions.xml filter-abund.xml filter-below-abund.py filter-below-abund.xml macros.xml normalize-by-median.xml repository_dependencies.xml test-data/normalize-by-median.c2.report.txt test-data/normalize-by-median.paired.report.txt test-data/normalize-by-median.report.txt test-data/random-20-a.fa test-data/random-20-a.fa.part test-data/test-abund-read-2.fa test-data/test-abund-read-2.fa.below test-data/test-abund-read-2.large.oxlicg test-data/test-abund-read-2.large.oxlicg.info test-data/test-abund-read-2.oxlicg test-data/test-abund-read-2.oxlicg.info test-data/test-abund-read-paired.fa
diffstat 23 files changed, 3269 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist-single.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,94 @@
+<tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" version="@WRAPPER_VERSION@-5">
+    <description>
+        Calculate abundance distribution of the k-mers in a given sequence file
+    </description>
+    <macros>
+        <token name="@BINARY@">abundance-dist-single.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+mkdir output && cd output &&
+@BINARY@
+@TABLEPARAMS@
+${zero}
+${bigcount}
+#if $save_countgraph
+    --savegraph=${optional_output_countgraph}
+#end if
+--squash
+@THREADS@
+${input_sequence_filename}
+${output_histogram_filename}
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequence_filename" />
+        <param name="save_countgraph"
+            type="boolean"
+            label="Save the k-mer countgraph to a file"
+            help="(--savegraph)" />
+        <expand macro="input_zero" />
+        <expand macro="input_bigcount" />
+        <expand macro="tableinputs" />
+    </inputs>
+    <outputs>
+        <data name="optional_output_countgraph"
+            format="oxlicg"
+            label="${tool.name} k-mer countgraph">
+            <filter>save_countgraph == True</filter>
+        </data>
+        <expand macro="abundance-histogram-output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+            <param name="type" value="specific" />
+            <param name="tablesize_specific" value="1e7" />
+            <param name="n_tables" value="2" />
+            <param name="ksize" value="17" />
+            <param name="no_zero" value="false" />
+            <output name="output_histogram_filename">
+                <assert_contents>
+                    <has_text text="1,96,96,0.98" />
+                    <has_text text="1001,2,98,1.0" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+            <param name="type" value="specific" />
+            <param name="tablesize_specific" value="1e7" />
+            <param name="n_tables" value="2" />
+            <param name="ksize" value="17" />
+            <param name="no_zero" value="false" />
+            <param name="bigcount" value="false" />
+            <output name="output_histogram_filename">
+                <assert_contents>
+                    <has_text text="1,96,96,0.98" />
+                    <has_text text="255,2,98,1.0" />
+                </assert_contents>
+            </output>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+Calculate the abundance distribution of k-mers from a single sequence file.
+
+Note that with `-b` this script is constant memory; in exchange,
+k-mer counts will stop at 255. The memory usage of this script with
+`-b` will be about 1.15x the product of the `-x` and
+`-N` numbers.
+
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="counting-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,69 @@
+<tool id="gedlab-khmer-abundance-dist" name="Abundance Distribution" version="@WRAPPER_VERSION@-4">
+    <description>
+        Calculate abundance distribution of the k-mers in a given sequence file using a pre-made k-mer countgraph
+    </description>
+    <macros>
+        <token name="@BINARY@">abundance-dist.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+mkdir output && cd output &&
+@BINARY@
+--squash
+${zero}
+${bigcount}
+${input_countgraph_filename}
+${input_sequence_filename}
+${output_histogram_filename}
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_countgraph_filename" />
+        <expand macro="input_sequence_filename" />
+        <expand macro="input_zero" />
+        <expand macro="input_bigcount" />
+    </inputs>
+    <outputs>
+        <expand macro="abundance-histogram-output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <param name="zero" value="false" />
+            <output name="output_histogram_filename">
+                <assert_contents>
+                    <has_text text="1,96,96,0.98" />
+                    <has_text text="1001,2,98,1.0" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <param name="zero" value="false" />
+            <param name="bigcount" value="false" />
+            <output name="output_histogram_filename">
+                <assert_contents>
+                    <has_text text="1,96,96,0.98" />
+                    <has_text text="255,2,98,1.0" />
+                    </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Calculate abundance distribution of the k-mers in the sequence file using a
+pre-made k-mer countgraph.
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="counting-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count-median.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,55 @@
+<tool id="gedlab-khmer-count-median" name="Count Median" version="@WRAPPER_VERSION@-1">
+    <description>Count the median/avg k-mer abundance for each sequence in the input file</description>
+    <macros>
+        <token name="@BINARY@">count-median.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+@BINARY@
+$input_countgraph_filename
+$input_sequence_filename
+$output_summary_filename
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequence_filename" />
+        <expand macro="input_countgraph_filename" />
+    </inputs>
+    <outputs>
+        <data name="output_summary_filename" format="txt"
+            label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <output name="output_summary_filename">
+                <assert_contents>
+                    <has_text text="seq,1001,1001.0,0.0,18" />
+                    <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Count the median/avg k-mer abundance for each sequence in the input file,
+based on the k-mer counts in the given k-mer countgraph. Can be used to
+estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
+output file contains sequence id, median, average, stddev, and seq length;
+fields are separated by spaces. For khmer 1.x count-median.py will split
+sequence names at the first space which means that some sequence formats (e.g.
+paired FASTQ in Casava 1.8 format) will yield uninformative names. Use
+`--csv` to fix this behavior.
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="diginorm-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/do-partition.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,58 @@
+<tool id="gedlab-khmer-do-partition" name="Sequence partition all-in-one" version="@WRAPPER_VERSION@-4">
+    <description>Load, partition, and annotate FAST[AQ] sequences</description>
+    <macros>
+        <token name="@BINARY@">do-partition.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+ln -s ${input} sequence-${num} &&
+#end for
+mkdir -p output && cd output &&
+@BINARY@
+@TABLEPARAMS@
+@THREADS@
+output
+../sequence-*
+&&
+mv output.info $information
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <expand macro="tableinputs" />
+    </inputs>
+    <outputs>
+        <data name="information" format="txt" label="${tool.name} summary." />
+        <collection name="annotated_sequence_files" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="random-20-a.fa"/>
+            <output_collection name="annotated_sequence_files" type="list">
+                <element name="sequence-0.part" file="random-20-a.fa.part" />
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Load in a set of sequences, partition them, merge the partitions, and
+annotate the original sequences files with the partition information.
+
+This script combines the functionality of `load-graph.py`,
+`partition-graph.py`, `merge-partitions.py`, and
+`annotate-partitions.py` into one script. This is convenient
+but should probably not be used for large data sets, because
+`do-partition.py` doesn't provide save/resume functionality.
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="graph-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract-partitions.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,60 @@
+<tool id="gedlab-khmer-extract-partitions" name="Extract partitions" version="@WRAPPER_VERSION@-2">
+    <description>Separate sequences that are annotated with partitions into grouped files</description>
+    <macros>
+        <token name="@BINARY@">extract-partitions.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+mkdir -p output &&
+cd output &&
+@BINARY@
+--max-size $max_size
+--min-partition-size $min_partition_size
+$output_unassigned
+output
+#for input in $inputs
+$input
+#end for
+&&
+mv output.dist $distribution
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <param name="max_size" type="integer" label="Max group size" value="1000000"
+            help="No more than this many number of sequences will be stored in each output (--max-size/-X)"/>
+        <param name="min_partition_size" type="integer" label="Min partition size" value="5"
+            help="The minimum partition size worth keeping (--min-partition-size/-m)" />
+        <param name="output_unassigned" type="boolean" checked="false" truevalue="--output-unassigned" falsevalue=""
+            label="Output unassigned sequences" help="(--output-unassigned/-U)" />
+    </inputs>
+    <outputs>
+        <data name="distribution" format="txt" label="Partition size distribution from ${tool.name}" />
+        <collection name="groups-of-partitions" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="random-20-a.fa.part"/>
+            <output_collection name="groups-of-partitions">
+                <element name="output.group0000.part" file="random-20-a.part.extract.fa" />
+            </output_collection>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+Separate sequences that are annotated with partitions into grouped files.
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="graph-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-abund.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,81 @@
+<tool id="gedlab-khmer-filter-abund" name="Filter by abundance" version="@WRAPPER_VERSION@-5">
+    <description>
+        Trims fastq/fasta sequences at k-mers of a given abundance
+        based on a provided k-mer countgraph
+    </description>
+    <macros>
+        <token name="@BINARY@">filter-abund.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+ln -s ${input} filter-abund-sequence-${num} &&
+#end for
+mkdir output && cd output &&
+@BINARY@
+--cutoff=${cutoff}
+${variable_coverage}
+@THREADS@
+${input_countgraph_filename}
+../filter-abund-sequence*
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <param name="variable_coverage" type="boolean" checked="false" truevalue="--variable-coverage" falsevalue=""
+            label="Variable coverage"
+            help="Only trim when a sequence has high enough coverage; median abundance > 20 (--variable_coverage)" />
+        <param name="cutoff" type="integer" value="2" label="Cutoff"
+            help="Trim at k-mers below this abundance. (--cutoff)" />
+        <expand macro="input_countgraph_filename" />
+    </inputs>
+    <outputs>
+        <collection name="filter-abund-sequences" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename"
+                value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <output_collection name="filter-abund-sequences" type="list">
+                <element name="filter-abund-sequence-0.abundfilt"> 
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename"
+                value="test-abund-read-2.oxlicg" ftype="oxlicg" />
+            <param name="cutoff" value="1" />
+            <output_collection name="filter-abund-sequences" type="list">
+                <element name="filter-abund-sequence-0.abundfilt">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Trim sequences at a minimum k-mer abundance.
+
+If the input sequences are from RNAseq or metagenome sequencing then
+`--variable-coverage` should be used.
+
+@HELP_FOOTER@
+]]>
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="counting-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.py	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,87 @@
+#! /usr/bin/env python
+# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+# Copyright (C) 2011-2015, Michigan State University.
+# Copyright (C) 2015, The Regents of the University of California.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#
+#     * Neither the name of the Michigan State University nor the names
+#       of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Contact: khmer-project@idyll.org
+from __future__ import print_function
+import sys
+import os
+import khmer
+from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter
+
+WORKER_THREADS = 8
+GROUPSIZE = 100
+
+CUTOFF = 50
+
+###
+
+
+def main():
+    counting_ht = sys.argv[1]
+    infiles = sys.argv[2:]
+
+    print('file with ht: %s' % counting_ht)
+    print('-- settings:')
+    print('N THREADS', WORKER_THREADS)
+    print('--')
+
+    print('making hashtable')
+    ht = khmer.load_countgraph(counting_ht)
+    K = ht.ksize()
+
+    for infile in infiles:
+        print('filtering', infile)
+        outfile = os.path.basename(infile) + '.below'
+
+        outfp = open(outfile, 'w')
+
+        def process_fn(record, ht=ht):
+            name = record['name']
+            seq = record['sequence']
+            if 'N' in seq:
+                return None, None
+
+            trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF)
+
+            if trim_at >= K:
+                return name, trim_seq
+
+            return None, None
+
+        tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE)
+
+        tsp.start(verbose_fasta_iter(infile), outfp)
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,50 @@
+<tool id="gedlab-khmer-filter-below-abund" name="Filter below abundance cutoff of 50" version="@WRAPPER_VERSION@-3">
+    <description>
+        Trims fastq/fasta sequences at k-mers with abundance below 50
+        based on a provided k-mer countgraph
+    </description>
+    <macros>
+        <token name="@BINARY@">filter-below-abund.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+ln -s ${input} sequence-${num} &&
+mkdir output && cd output &&
+@BINARY@
+${input_countgraph_filename}
+../sequence*
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <expand macro="input_countgraph_filename" />
+    </inputs>
+    <outputs>
+        <collection name="sequence_files" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="input_countgraph_filename" value="test-abund-read-2.large.oxlicg" ftype="oxlicg" />
+            <output_collection name="sequence_files">
+                <element name="sequence-0.below" file="test-abund-read-2.fa.below" />
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Trims sequences at the first kmer with abundance above 50.
+
+@HELP_FOOTER@
+]]></help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="counting-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,146 @@
+<macros>
+    <token name="@WRAPPER_VERSION@">2.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@WRAPPER_VERSION@">khmer</requirement>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>@BINARY@ --version</version_command>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" />
+        </stdio>
+    </xml>
+    <token name="@TABLEPARAMS@">
+#if $parameters.type == "simple"
+  --ksize=20
+  --n_tables=4
+  --max-tablesize=$parameters.tablesize
+#else
+  --ksize=$parameters.ksize
+  --n_tables=$parameters.n_tables
+  --max-tablesize="$parameters.tablesize_specific"
+#end if
+    </token>
+    <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
+    <xml name="tableinputs">
+        <conditional name="parameters">
+            <param name="type" type="select" label="Advanced Parameters"
+                help="ksize, n_tables, a specific tablesize" >
+                <option value="simple" selected="true">Hide</option>
+                <option value="specific">Show</option>
+            </param>
+            <when value="simple">
+                <param name="tablesize" type="select" label="Sample Type" display="radio">
+                    <option value="1e9" selected="true">Microbial Genome</option>
+                    <option value="2e9">Animal Transcriptome</option>
+                    <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option>
+                    <option value="16e9">Large Animal Genome</option>
+                </param>
+            </when>
+            <when value="specific">
+                <param name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
+                <param name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
+                <param name="tablesize_specific" type="text" value="1000000.0" 
+                    label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="input_sequences_filenames">
+        <param  name="inputs" multiple="true" type="data" format="fasta,fastq"
+            label="Sequences in FASTA or FASTQ format"
+            help="Put in order of precedence such as longest reads first." />
+    </xml>
+    <xml name="input_sequence_filename">
+        <param  name="input_sequence_filename" type="data" format="fasta,fastq"
+            label="Sequence in FASTA or FASTQ format" />
+    </xml>
+    <xml name="input_countgraph_filename">
+        <param  name="input_countgraph_filename" type="data" format="oxlicg"
+            label="the k-mer countgraph to query"
+            help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." />
+    </xml>
+    <xml name="abundance-histogram-output">
+        <data name="output_histogram_filename" format="txt"
+            label="${tool.name} k-mer abundance histogram. The
+                columns are: (1) k-mer abundance, (2) k-mer count, (3)
+                cumulative count, (4) fraction of total distinct k-mers." />
+    </xml>
+    <xml name="output_sequences">
+        <data name="output" format_source="inputs"
+            label="${tool.name} processed nucleotide sequence file">
+            <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true"/>
+        </data>
+    </xml>
+    <xml name="output_sequences_single">
+        <data name="output" format_source="input_sequence_filename"
+            label="${tool.name} processed nucleotide sequence file" />
+    </xml>
+    <xml name="input_zero">
+        <param name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true"
+            help="Output zero count bins (--no-zero)" />
+    </xml>
+    <xml name="input_bigcount">
+        <param  name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount"
+            checked="true" help="Count k-mers past 255 occurences (--no-bigcount)" />
+    </xml>
+    <token name="@HELP_FOOTER@"><![CDATA[
+(from the khmer project: http://khmer.readthedocs.org/en/v@WRAPPER_VERSION@/ )]]></token>
+    <xml name="software-citation">
+        <citation type="bibtex">@article{khmer2015,
+     author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine
+  and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau,
+  Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton,
+  Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and
+  Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah
+  and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and
+  Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David
+  and Lippi, Justin and Mansour, Tamer and McA'Nulty, Pamela and McDonald, Eric
+  and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy,
+  Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory,
+  Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and
+  Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and
+  Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan,
+  Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and
+  Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather
+  L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and
+  Brown, C. Titus"
+     title = "The khmer software package: enabling efficient nucleotide
+  sequence analysis",
+     year = "2015",
+     month = "08",
+     publisher = "F1000",
+     url = "http://dx.doi.org/10.12688/f1000research.6924.1"
+  }</citation>
+    </xml>
+    <xml name="diginorm-citation">
+        <citation type="bibtex">@unpublished{diginorm,
+    author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz,
+Alexis B and Brom, Timothy H",
+    title = "A Reference-Free Algorithm for Computational Normalization of
+Shotgun Sequencing Data",
+    year = "2012",
+    eprint = "arXiv:1203.4802",
+    url = "http://arxiv.org/abs/1203.4802",
+}</citation></xml>
+    <xml name="graph-citation">
+        <citation type="bibtex">@article{Pell2012,
+  doi = {10.1073/pnas.1121464109},
+  url = {http://dx.doi.org/10.1073/pnas.1121464109},
+  year  = {2012},
+  month = {jul},
+  publisher = {Proceedings of the National Academy of Sciences},
+  volume = {109},
+  number = {33},
+  pages = {13272--13277},
+  author = {J. Pell and A. Hintze and R. Canino-Koning and A. Howe and J. M. Tiedje and C. T. Brown},
+  title = {Scaling metagenome sequence assembly with probabilistic de Bruijn graphs},
+  journal = {Proceedings of the National Academy of Sciences}
+  }</citation>
+    </xml>
+    <xml name="counting-citation">
+        <citation type="doi">10.1371/journal.pone.0101271</citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize-by-median.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,139 @@
+<tool id="gedlab-khmer-normalize-by-median" name="Normalize By Median" version="@WRAPPER_VERSION@-4">
+    <description>Filters a fastq/fasta file using digital normalization via median k-mer abundances</description>
+    <macros>
+        <token name="@BINARY@">normalize-by-median.py</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+    ln -s ${input} sequence-${num} &&
+#end for
+mkdir output &&
+cd output &&
+normalize-by-median.py
+${paired_switch}
+${force_single_switch}
+@TABLEPARAMS@
+--cutoff=${cutoff}
+#if $unpaired_reads_filename
+    --unpaired-reads=${unpaired_reads_filename}
+#end if
+#if $save_countgraph
+    --savegraph=${countgraph}
+#end if
+#if $countgraph_to_load
+    --loadgraph=${countgraph_to_load}
+#end if
+--report=${report}
+../sequence-*
+]]>
+    </command>
+    <inputs>
+        <expand macro="input_sequences_filenames" />
+        <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
+            label="Require all sequences be properly paired?"
+            help="(--paired) The tool will fail if given improperly paired reads and this option is selected." />
+        <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
+            label="Ignore all pairing information?"
+            help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." />
+        <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
+            label="Extra unpaired reads"
+            help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
+        <param name="countgraph_to_load" type="data" format="oxlicg" optional="true"
+            label="Optional k-mer countgraph"
+            help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
+        <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" />
+        <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" />
+        <expand macro="tableinputs" />
+    </inputs>
+    <outputs>
+        <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph">
+            <filter>save_countgraph == True</filter>
+        </data>
+        <data name="report" format="txt" label="${tool.name} report" />
+        <collection name="sequences" type="list">
+            <discover_datasets pattern="__name__" directory="output" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa"/>
+            <param name="type" value="specific" />
+            <param name="cutoff" value="1" />
+            <param name="ksize" value="17" />
+            <output name="report" file="normalize-by-median.report.txt" />
+            <output_collection name="sequences" type="list">
+                <element name="sequence-0.keep">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="type" value="specific" />
+            <param name="cutoff" value="2" />
+            <param name="ksize" value="17" />
+            <output name="report" file="normalize-by-median.c2.report.txt" />
+            <output_collection name="sequences" type="list">
+                <element name="sequence-0.keep">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputs" value="test-abund-read-paired.fa" />
+            <param name="type" value="specific" />
+            <param name="cutoff" value="1" />
+            <param name="ksize" value="17" />
+            <param name="paired" value="true" />
+            <output name="report" file="normalize-by-median.paired.report.txt" />
+            <output_collection name="sequences" type="list">
+                <element name="sequence-0.keep">
+                    <assert_contents>
+                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
+                        <has_text text="GGTTGACGGGGCTCAGGG" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Do digital normalization (remove mostly redundant sequences)
+
+Discard sequences based on whether or not their median k-mer abundance lies
+above a specified cutoff. Kept sequences will be placed in <fileN>.keep.
+
+By default, Paired end reads will be considered together; if either read will
+be kept, then both will be kept. (This keeps both reads from a fragment, and
+helps with retention of repeats.) Unpaired reads are treated individually.
+
+If `--paired` is set then proper pairing is required and the tool will exit on
+unpaired reads, although `--unpaired-reads` can be used to supply a file of
+orphan reads to be read after the paired reads.
+
+`--force_single` will ignore all pairing information and treat reads
+individually.
+
+With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified
+file after all sequences have been processed. `--loadgraph` will load the
+specified k-mer countgraph before processing the specified files.  Note
+that the countgraph is in same format as those produced by
+`load-into-counting.py` and consumed by `abundance-dist.py`.
+
+@HELP_FOOTER@
+]]>    
+    </help>
+    <citations>
+        <expand macro="software-citation" />
+        <expand macro="diginorm-citation" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="We require the khmer package and the oxli datatype definitions.">
+    <repository changeset_revision="be6719477611" name="package_khmer_2_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+</repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize-by-median.c2.report.txt	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,2 @@
+total,kept,f_kept
+1001,2,0.001998
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize-by-median.paired.report.txt	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,2 @@
+total,kept,f_kept
+6,2,0.3333
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize-by-median.report.txt	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,2 @@
+total,kept,f_kept
+1001,1,0.000999
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,198 @@
+>35
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa.part	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,198 @@
+>35	2
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16	2
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46	2
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40	2
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33	2
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98	2
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17	2
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89	2
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30	2
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82	2
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60	2
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83	2
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12	2
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85	2
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2	2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45	2
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11	2
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39	2
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26	2
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75	2
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81	2
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97	2
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13	2
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92	2
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56	2
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61	2
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96	2
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31	2
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29	2
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54	2
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0	2
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90	2
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34	2
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43	2
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8	2
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37	2
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51	2
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32	2
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78	2
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18	2
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36	2
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53	2
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24	2
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7	2
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9	2
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47	2
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62	2
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79	2
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48	2
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66	2
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25	2
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5	2
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72	2
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76	2
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69	2
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87	2
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27	2
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77	2
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95	2
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63	2
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38	2
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20	2
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88	2
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49	2
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91	2
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86	2
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42	2
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70	2
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19	2
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84	2
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52	2
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71	2
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93	2
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58	2
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22	2
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50	2
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21	2
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73	2
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68	2
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23	2
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94	2
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10	2
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41	2
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80	2
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64	2
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57	2
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1	2
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55	2
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67	2
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14	2
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15	2
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59	2
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28	2
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74	2
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4	2
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65	2
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6	2
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44	2
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3	2
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.fa	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,2002 @@
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
+>seq
+GGTTGACGGGGCTCAGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.fa.below	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,2 @@
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
Binary file test-data/test-abund-read-2.large.oxlicg has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.large.oxlicg.info	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,4 @@
+through test-abund-read-2.fa
+Total number of unique k-mers: 83
+fp rate estimated to be 0.000
+
Binary file test-data/test-abund-read-2.oxlicg has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-2.oxlicg.info	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,4 @@
+through /home/mcrusoe/khmer/tests/test-data/test-abund-read-2.fa
+Total number of unique k-mers: 98
+fp rate estimated to be 0.000
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-abund-read-paired.fa	Sat Oct 17 04:02:33 2015 -0400
@@ -0,0 +1,12 @@
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 1::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 2::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG