changeset 0:4b34f2b5c14e draft

Uploaded
author drosofff
date Mon, 13 Apr 2015 18:17:08 -0400
parents
children c1d17d173128
files retrieve_fasta_from_NCBI.py retrieve_fasta_from_NCBI.xml test-data/output.fa
diffstat 3 files changed, 885 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/retrieve_fasta_from_NCBI.py	Mon Apr 13 18:17:08 2015 -0400
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+From a taxonomy ID retrieves all the nucleotide sequences
+It returns a multiFASTA nuc/prot file
+
+Entrez Database  UID common name  E-utility Database Name
+Nucleotide       GI number        nuccore
+Protein          GI number        protein
+
+Retrieve strategy:
+
+esearch to get total number of UIDs (count)
+esearch to get UIDs in batches
+loop untile end of UIDs list:
+  epost to put a batch of UIDs in the history server
+  efetch to retrieve info from previous post
+
+retmax of efetch is 1/10 of declared value from NCBI
+
+queries are 1 sec delayed, to satisfy NCBI guidelines (more than what they request)
+
+
+python get_fasta_from_taxon.py -i 1638 -o test.out -d protein
+python get_fasta_from_taxon.py -i 327045 -o test.out -d nuccore # 556468 UIDs
+"""
+
+import logging
+import optparse
+import time
+import urllib
+import urllib2
+import re
+class Eutils:
+
+    def __init__(self, options, logger):
+        self.logger = logger
+        self.base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+        self.query_string = options.query_string
+        self.dbname = options.dbname
+        if options.outname:
+            self.outname = options.outname
+        else:
+            self.outname = 'NCBI_download' + '.' + self.dbname + '.fasta'
+        self.ids = []
+        self.retmax_esearch = 100000
+        self.retmax_efetch = 1000
+        self.count = 0
+        self.webenv = ""
+        self.query_key = ""
+
+    def retrieve(self):
+        """ """
+        self.get_count_value()
+        self.get_uids_list()
+        self.get_sequences()
+
+    def get_count_value(self):
+        """
+        just to retrieve Count (number of UIDs)
+        Total number of UIDs from the retrieved set to be shown in the XML
+        output (default=20). By default, ESearch only includes the first 20
+        UIDs retrieved in the XML output. If usehistory is set to 'y',
+        the remainder of the retrieved set will be stored on the History server;
+
+        http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
+        """
+        self.logger.info("retrieving data from %s" % self.base)
+        self.logger.info("for Query: %s and database: %s" %
+                         (self.query_string, self.dbname))
+        querylog = self.esearch(self.dbname, self.query_string, '', '', "count")
+        self.logger.debug("Query response:")
+        for line in querylog:
+            self.logger.debug(line.rstrip())
+            if '</Count>' in line:
+                self.count = int(line[line.find('<Count>')+len('<Count>') : line.find('</Count>')])
+        self.logger.info("Founded %d UIDs" % self.count)
+
+    def get_uids_list(self):
+        """
+        Increasing retmax allows more of the retrieved UIDs to be included in the XML output,
+        up to a maximum of 100,000 records.
+        from http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
+        """
+        retmax = self.retmax_esearch
+        if (self.count > retmax):
+            num_batches = (self.count / retmax) + 1
+        else:
+            num_batches = 1
+        self.logger.info("Batch size for esearch action: %d UIDs" % retmax)
+        self.logger.info("Number of batches for esearch action: %d " % num_batches)
+        for n in range(num_batches):
+            querylog = self.esearch(self.dbname, self.query_string, n*retmax, retmax, '')
+            for line in querylog:
+                if '<Id>' in line and '</Id>' in line:
+                    uid = (line[line.find('<Id>')+len('<Id>') : line.find('</Id>')])
+                    self.ids.append(uid)
+            self.logger.info("Retrieved %d UIDs" % len(self.ids))
+
+    def esearch(self, db, term, retstart, retmax, rettype):
+        url = self.base + "esearch.fcgi"
+        self.logger.debug("url: %s" % url)
+        values = {'db': db,
+                  'term': term,
+                  'rettype': rettype,
+                  'retstart': retstart,
+                  'retmax': retmax}
+        data = urllib.urlencode(values)
+        self.logger.debug("data: %s" % str(data))
+        req = urllib2.Request(url, data)
+        response = urllib2.urlopen(req)
+        querylog = response.readlines()
+        time.sleep(1)
+        return querylog
+
+    def epost(self, db, ids):
+        url = self.base + "epost.fcgi"
+        self.logger.debug("url_epost: %s" % url)
+        values = {'db': db,
+                  'id': ids}
+        data = urllib.urlencode(values)
+        req = urllib2.Request(url, data)
+        #self.logger.debug("data: %s" % str(data))
+        req = urllib2.Request(url, data)
+        response = urllib2.urlopen(req)
+        querylog = response.readlines()
+        self.logger.debug("query response:")
+        for line in querylog:
+            self.logger.debug(line.rstrip())
+            if '</QueryKey>' in line:
+                self.query_key = str(line[line.find('<QueryKey>')+len('<QueryKey>'):line.find('</QueryKey>')])
+            if '</WebEnv>' in line:
+                self.webenv = str(line[line.find('<WebEnv>')+len('<WebEnv>'):line.find('</WebEnv>')])
+            self.logger.debug("*** epost action ***")
+            self.logger.debug("query_key: %s" % self.query_key)
+            self.logger.debug("webenv: %s" % self.webenv)
+        time.sleep(1)
+
+    def efetch(self, db, query_key, webenv):
+        url = self.base + "efetch.fcgi"
+        self.logger.debug("url_efetch: %s" % url)
+        values = {'db': db,
+                  'query_key': query_key,
+                  'webenv': webenv,
+                  'rettype': "fasta",
+                  'retmode': "text"}
+        data = urllib.urlencode(values)
+        req = urllib2.Request(url, data)
+        self.logger.debug("data: %s" % str(data))
+        req = urllib2.Request(url, data)
+        response = urllib2.urlopen(req)
+        fasta = response.read()
+        if self.dbname != "pubmed":
+            assert fasta.startswith(">"), fasta
+        fasta = self.sanitiser(self.dbname, fasta) #
+        time.sleep(1)
+        return fasta
+        
+    def sanitiser(self, db, fastaseq):
+		if db not in "nuccore protein" : return fastaseq
+		regex = re.compile(r"[ACDEFGHIKLMNPQRSTVWYBZ]{49,}")
+		sane_seqlist = []
+		seqlist = fastaseq.split("\n\n")
+		for seq in seqlist[:-1]:
+			fastalines = seq.split("\n")
+			if len(fastalines) < 2:
+				self.logger.info("Empty sequence for %s" % ("|".join(fastalines[0].split("|")[:4]) ) )
+				self.logger.info("%s download is skipped" % ("|".join(fastalines[0].split("|")[:4]) ) )
+				continue
+			if db == "nuccore":
+				badnuc = 0
+				for nucleotide in fastalines[1]:
+					if nucleotide not in "ATGC":
+						badnuc += 1
+				if float(badnuc)/len(fastalines[1]) > 0.4:
+					self.logger.info("%s ambiguous nucleotides in %s or download interrupted at this offset | %s" % ( float(badnuc)/len(fastalines[1]), "|".join(fastalines[0].split("|")[:4]), fastalines[1]) )
+					self.logger.info("%s download is skipped" % (fastalines[0].split("|")[:4]) )
+					continue
+				fastalines[0] = fastalines[0].replace(" ","_")[:100] # remove spaces and trim the header to 100 chars
+				cleanseq = "\n".join(fastalines)
+				sane_seqlist.append(cleanseq)
+			elif db == "protein":
+				fastalines[0] = fastalines[0][0:100]
+				fastalines[0] = fastalines[0].replace(" ", "_")
+				fastalines[0] = fastalines[0].replace("[", "_")
+				fastalines[0] = fastalines[0].replace("]", "_")
+				fastalines[0] = fastalines[0].replace("=", "_")
+				fastalines[0] = fastalines[0].rstrip("_") # because blast makedb doesn't like it 
+				fastalines[0] = re.sub(regex, "_", fastalines[0])
+				cleanseq = "\n".join(fastalines)
+				sane_seqlist.append(cleanseq)
+#		sane_seqlist[-1] = sane_seqlist[-1] + "\n" # remove to have sequence blocks not separated by two \n
+		return "\n".join(sane_seqlist)
+
+    def get_sequences(self):
+        """
+        Total number of records from the input set to be retrieved, up to a maximum
+        of 10,000. Optionally, for a large set the value of retstart can be iterated
+        while holding retmax constant, thereby downloading the entire set in batches
+        of size retmax.
+        
+        http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
+        
+        """
+        batch_size = self.retmax_efetch
+        count = self.count
+        uids_list = self.ids
+        self.logger.info("Batch size for efetch action: %d" % batch_size)
+        self.logger.info("Number of batches for efetch action: %d" % ((count / batch_size) + 1))
+        with open(self.outname, 'w') as out:
+            for start in range(0, count, batch_size):
+                end = min(count, start+batch_size)
+                batch = uids_list[start:end]
+                self.epost(self.dbname, ",".join(batch))
+                self.logger.info("retrieving batch %d" % ((start / batch_size) + 1))
+                mfasta = self.efetch(self.dbname, self.query_key, self.webenv)
+                out.write(mfasta + '\n')
+
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+
+def __main__():
+    """ main function """
+    parser = optparse.OptionParser(description='Retrieve data from NCBI')
+    parser.add_option('-i', dest='query_string', help='NCBI Query String')
+    parser.add_option('-o', dest='outname', help='output file name')
+    parser.add_option('-l', '--logfile', help='log file (default=stderr)')
+    parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)')
+    parser.add_option('-d', dest='dbname', help='database type')
+    (options, args) = parser.parse_args()
+    if len(args) > 0:
+        parser.error('Wrong number of arguments')
+    
+    log_level = getattr(logging, options.loglevel)
+    kwargs = {'format': LOG_FORMAT,
+              'datefmt': LOG_DATEFMT,
+              'level': log_level}
+    if options.logfile:
+        kwargs['filename'] = options.logfile
+    logging.basicConfig(**kwargs)
+    logger = logging.getLogger('data_from_NCBI')
+    
+    E = Eutils(options, logger)
+    E.retrieve()
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/retrieve_fasta_from_NCBI.xml	Mon Apr 13 18:17:08 2015 -0400
@@ -0,0 +1,57 @@
+<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="0.9.2">
+  <description></description>
+  <command interpreter="python">retrieve_fasta_from_NCBI.py -i "$queryString" -d $dbname -o $outfilename -l $logfile </command>
+
+  <inputs>
+    <param name="queryString" type="text" size="5x80" area="True" value="txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" label="Query to NCBI in entrez format" help="exemple:'Drosophila melanogaster[Organism] AND Gcn5[Title]">
+      <sanitizer>
+        <valid initial="string.printable">
+          <remove value="&quot;"/>
+          <remove value="\"/>
+        </valid>
+        <mapping initial="none">
+          <add source="&quot;" target="\&quot;"/>
+          <add source="\" target="\\"/>
+        </mapping>
+      </sanitizer>
+    </param>
+    <param name="dbname" type="select" label="NCBI database">
+      <option value="nuccore">Nucleotide</option>
+      <option value="protein">Protein</option>
+<!--      <option value="pubmed">Pubmed (experimental)</option> -->
+    </param>
+  </inputs>
+  <outputs>
+    <data name="outfilename" format="fasta" label="${tool.name} on ${on_string}: queryString${queryString.value}.${dbname.value_label}.fasta" />
+    <data format="txt" name="logfile" label="${tool.name} on ${on_string}: log"/>
+  </outputs>
+  <tests>
+    <test>
+        <param name="queryString" value="DCV AND virus" />
+        <param name="dbname" value="nuccore" />
+        <output name="outfilename" ftype="fasta" file="output.fa" />
+        <!--  <output name="logfile" ftype="txt" file="log.txt" />  log.txt changes with timestamp. removed to pass the  test -->
+    </test>
+  </tests>
+  <help>
+**What it does**
+
+This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database for a given entrez query.
+
+The tool is preset with "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose
+
+See `Entrez help`_ for explanation of query formats
+
+**Acknowledgments**
+
+This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_.
+
+It is Copyright © 2014-2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_.
+
+.. _Entrez help: http://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options
+.. _get_fasta_from_taxon: https://toolshed.g2.bx.psu.edu/view/crs4/get_fasta_from_taxon
+.. _CNRS and University Pierre et Marie Curie: http://www.ibps.upmc.fr/en
+.. _MIT license: http://opensource.org/licenses/MIT
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fa	Mon Apr 13 18:17:08 2015 -0400
@@ -0,0 +1,577 @@
+>gi|92133599|dbj|BD295732.1|_WO_2003033719-A/3:_Novel_tertiary_structure_having_ability_to_accelerat
+GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT
+AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT
+CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT
+>gi|28414844|dbj|BD173513.1|_WO_2002061080-A/3:_Novel_tertiary_structure_having_ability_to_accelerat
+GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT
+AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT
+CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT
+>gi|9629650|ref|NC_001834.1|_Drosophila_C_virus,_complete_genome
+TTTATATCGTGTGTACATATAAATATGTACACACGGCTTTTAGGTAGAATATTGTTTTCAATGTTGATTT
+TAAAGGTAACTTTGGTTATTATGCTTTACGGTTTTCATTGTTGATGGTATTTGTGGCCTGCGGTCCCTAA
+TTGTTGAATTATTTATTCTGATACGTTGTTTTCATTGTTGATGGTAAGGATTCTTATTTTGAAGTGGTTT
+TTCAGAAGATAACTCTAAATATGAATTATGCCTTATTGTTTTCAATGTTGATGGCCTTCGTTTAAATACT
+CTTTGTTAATGACGGTAATCAAAGATTACATCTCAAACTTAGATTAATATTTTTAAGTAGGGTATACTGA
+GTTAGTCCTCTCTCTTTACTGATTTTGATATCTGGTAATTGACTTCGAAGAAAGATGCGTCTTTTGGATT
+TGTAATGACTGGGCCTTAAGTTCATAGGTGTTATTACATGGAGGAACACATTACTTTGGTTGATGATGAT
+GTTTTGATGATGACTTTCAATGTATGTGCTTATGTTAAGCCTGACATAAGAACTTACTAGTTTGCATAAT
+GCAAAGGGTTAGTATATGATTTTTAGTATGTGGATTTTGACACTGCCTTTGATTAGGATGTGTGAATGAT
+TTTGAAACATATTAAGATGTTTATACGAGCGTGTTGTTTACTATTTTCAGGATATGTGGAAGCGGTTGTG
+TATGATCTATACGCACATTTAGTTCCCAGAGGGCGTTGTCGTCTCCCCCTAAGCAAGGGAGAAACACGTG
+GCACATGATCTTGCGCTTAACGATAAAAATGGAATCTGATAAAAGTATGGCCTGTTTAAATAGAATTTTG
+ATGAATAAGATGATGTTTGTGGAAGATAAGATCTCTACCCTTAAGATGGTTGCTGATTATTATCAAAAAG
+AAGTAAAGTATGATTTTGATGCAGTTGAATCTCCCCGTGAGGCACCTGTATTTAGATGTACTTGTCGATT
+CCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATG
+TTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGTT
+ACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTAAT
+TAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAA
+CAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTG
+ATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAA
+ATATGTTTCTTTAAAAGAGGATATTATGAAGATTGTTTTAGTGATATTGCTTGTTCGTCTTTTAATGGTT
+TGGAAGAAGTATCGTGCTTCTCTGTGTGTTATTTTAATCTTTATTTTTAAATTTTATGGATTCGATCAAA
+AGTTGATTGATTTAATTATGGATTTGAAGAATAAAATATTTTCACAGGGTGCATTGGAAGATACAGTTGA
+GGAGGTTGTATATCATCCTTGGTTCCATACGTGTGGAAAAATCATCTTTGCGGTTATGGCTTTCTTAACA
+ATTAAGAAAATTCCTGGTAAACAGGATTGGGATAGTTACATAACACGTTTAGATCGTATCCCAAAATCTA
+TTGAGGGAGCTAAAAAGATCACTGATTACTGTTCAGAATATTTTAATATTGCTAATGATCAGATCAAGAT
+GATGGTTCTTGGAAAGACTAAAGAAGAATTGCAACGTGCTAATGGACTATATGGAGAAATTCAAGCTTGG
+GCTCAAGAGGTTCGCCAGTATTTGGAATTGGATCAACGGAATAAAATTGATCTAGATACTGAAACCGCAA
+ATCGTGTTGAACAACTTTGGATAAAGGGCTTGAAATTCAAGAGTGAACCCCTTTTGAGTAAGGAAATGTC
+AGCTTTAGTTCATACAACTCTTTTACCAGCTAAGCAATTGTACGAGTATGTATCGTGTTCTCCTGTTAAA
+GGGGGAGGACCACGTATGCGTCCAATTTGTTTATGGTTGGTAGGTGAATCAGGAGTTGGTAAGACTGAAA
+TGGTATATCCATTGTGCATTGATGTTCTTCGGGAAATGGGGATGATTAAGAAAGATGATTTTCATCATCA
+AGTTTATGGTCGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT
+GATGCATTTCAGAAGAAAGATGACAAAACAGCAGCTAACCCAGAAATTTTTGAGGTTATTCGCTCTTGCA
+ACACTTTTCCTCAGCATTTACATATGGCAGCTCTTCATGATAAAAATACTTTTTCTGCTGCTGAATTACT
+CTTATATACCACTAATGATTATAATGTTAAGCTGGAATCTATTACTTTTCCCGATGCTTTCTTTAATCGT
+ATGGGCGATATGGCTTATAAAGTTAGTCCTAAGAAAGAGTATGGTATTGAAACCGAGAAAGGGAATTCAG
+GTAAAACTTATTTAAAATTGGATAAGAGTAAATTGGACAAAACAAAAGCTATTGACCTTTCAGTGTATGA
+ATTCCAAAAAATTGTACGTGACGAGAAAAGTGATGCAGGTTGGATTGATTCTGGATCACCCTTGGACTAT
+GAAGATTTTGCTAAATTAGTGTGTTCAAAATGGAAAGAAGCGAAACAATCTTCAATGAATAAATTGAAAT
+TTTTGGAAGAATATGCTATTCGTGCTCAGGTTGGATCAGAAGAAAATTCTGAATATGGTGATTGTATAGA
+TTTTGTCGATGATATTGCCAAACGCTTACAAAAAGGTGAAACTCTTGAAGAAATAGAGTTTGATTATGCC
+TCAGATCCAGAGATGTTTACTCAATACTATCATTTTAAATCTACAATTAAACCGGCATCGCGTTGGCAGA
+AGTATAAGGATCGGATGGACATTTGTTTGAGCGACTGTAAGACTTATTTAGCAAAGAAATACGAAGAAAT
+TAAGAAAATTCTTGCCGAACATCCTATCTTGACGATTTTAGGAATGATAGGGGTTGCCTTATCTGCTCTG
+GCAATGTACTATTGGTTTTCTAAATCGTTGGATCCTGTAGAAGCCGAGGTTGCTCCTTCTGGTGACGCTA
+AAACAGTGCGCTTACCAAGGAAACTCGTTGAGATTGGTGCTTCTGGAGATGTTAAAACACAGAAGATTGT
+GAAACCCGTTGTAGAGACCGAATGGCATCGTAACAATAAAGGAGAGATTGAAATTTCTTGTGATGAATGT
+GGTATGCATAGGATGTCTGCATTTAACAATATGACAGATGAAGAATTTGATAACTGTACATATGAAGATT
+TGAATAAGGACCAGAAACGTGAACTTGCCCAGTGGTCTACTAAAGATTCTTGGTTAGGTCGATTCTTTTT
+GAGTCGAGATCGCAAGAATAAGGTTGGAATTTGGGCCGAAGTGGGACAATCAGGTGATGTTAAAACAAAT
+AAAGCTCAGATTAAACGTGTTGAAGCTGGAGCCGAAGAATTAGTTACTGTTGCTTTAACTCAAGGTTGTT
+CTGATGATGCTGCACACAATTTGATGATTGACGTTTTCCAAAAAAATACATATAGAATGTCATACTTCCG
+TGGAGACAAGCGTTATCAACTTGGAAATTGTACATTTGTTCGTGGTTGGTCTTTTATTATGCCATATCAT
+TTTGTACAGGCTGTGTTTGCGCGAAGATTGCCACCTAACACGATTATTTCTTTGTCCCAACAGATGTCTG
+AAGATTTAATGCAAATTCCATTATCACACTTCTTTTCTGCTGGTGTTGACAATTTTTATTTAACAGATAA
+TTGTGTGCGCTTGCCATTTAAAAATGGAGATTTTCGCGATTGTGTTATGGTTAATTTACATTCACGAATG
+TGCACGCCACATCGTGATTTGGTTCGGCATTTTATTTTAACTTCTGATCAAGGTAAATTGAAGGGATCTT
+TTAGTGGTGCAATGGCAACTTTCCATGTTAACAATATGGGTTTATATCGTGTTTATAATTGGCTAAATGC
+AGTTCGTCCTTGCGATAAAAAGATAGAAATTTTCCACCCTGAAGATGGTTTTGAGTATCCCGAAGAATCA
+TATATTCAACGTGACTGTTATGAATATAATGCACCGACTCGTACTGGGGATTGTGGATCTATTATTGGAT
+TGTATAATAAATATTTAGAAAGGAAAATCATCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGG
+TTATGCGTGCCCTTTGACACAAGAGTGTCTTGAGACTGCTTTTTCTGCTTTAGTAAATAAAAATAAGAAG
+AATATTTCCTCACAATTTTATTATGAAATACCCAATATGGTTGATCCACTCGGTGATAGTAGTGTTCCTG
+AAGGTAAGTTTTACGCTTTAGGAAAGTCATCTATTCGTGTGGGACAGGCAGTTAATTCGTCCATAATTCC
+TTCTCGAATTTATGGGAAGTTGTCTGTTCCTACAATGAAACCAGCACTACTCAAGCCAACGATTCTGAAT
+AACAAAGTACATAATCCTTTATTGTCGGGACTTAAGAAATGTGGTGTAGACACTGCGGTCTTGAGTGATG
+ATGAAGTTTTGAGTGCTTCACAAGATGTTTGTCGTGTTATGTTGAACCAATATAATAAAAATTTGAATAA
+AACAAAGTATCAACGCATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGT
+GCTATTAATCGTACAACGTCACCAGGATTTCCTTATGCACAAATGAAAAGAAATGCTCCAGGTAAACAGC
+AATGGATGGGTTTTGGTGAAGAATTTGATTTTACAAGTAATTATGCACTAGCTTTGCGGAAAGATGTTGA
+ACAACTCATTGAAGATTGTGCTAGCGGAAAAATATCTAATGTCATTTTTGTAGATACATTGAAAGATGAG
+CGACGCGATATAGCTAAAGTAAATGTAGGTAAAACGCGTGTATTTTCTGCAGGTCCTCAACATTTTGTAG
+TTGCATTTCGCCAATATTTCCTACCTTTTGCTGCTTGGTTGATGCATAATCGCATTTCGAACGAAGTAGC
+AGTTGGCACTAATGTTTATTCATCTGATTGGGAACGTATTGCAAAACGTCTTAAAACAAAAGGTAGTCAC
+GTCATTGCGGGGGACTTTGGAAATTTCGATGGATCTTTAGTAGCACAAATTTTGTGGGCCATATTTTGGG
+AAATTTTTGTTGTATGGCTTAAGCAATTTATTGATATAGAGAATTCAGAAGGAAAACGTATTTTATGTAT
+CTGTCTTGGTTTGTGGTCACATTTAGTTCACTCTGTTCATATTTATGAAGATAATGTATATATGTGGACT
+CATTCTCAACCTTCTGGCAATCCTTTCACTGTTATTATTAATTGCTTGTATAATTCGATTATTATGCGAC
+TGTCATGGATTCGTGTGATGGAGAAATTTCAACCTAGACTTAAGTCCATGAAGTGGTTCAACGAATATGT
+CGCCTTGATAACATATGGTGACGACAATGTTTTAAACATTGATGCAAAGGTTGTGGAATGGTTTAATCAG
+ATTAACATTAGTGAGGTTATGACTGAAATGCGACATGAATATACGGACGAAGCTAAAACTGGTGATATTG
+TTAAATCTCGTAAATTAGAAGATATTTTCTTTTTGAAGAGAAAATTTCGTTTTAGCCCAGAATTACAACG
+CCATGTTGCTCCATTGAAGATCGAAGTTATTTATGAAATGTTGAATTGGTCTCGCCGCTCTATAGATCCA
+GATGAAATCTTGATGTCGAACATTGAAACGGCTTTTCGTGAAGTAGTTTACCACGGAAAAGAAGAATACG
+ATAAACTAAGGTCAGCGGTATTGGCGTTGAAGGTACCCCAGGAACTTCCTGAAAACCCTCAGATTTTGAC
+GTACAACCAATATTTGCACGATATTGAATATCTTGCGGACCCTTTGTACGACTTTTAGTTAAGATGTGAT
+CTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT
+ATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCTCTCTGCTTCTTAT
+ATGATTAGGTTGTCATTTAGAATAAGAAAATAACCTGCTAACTTTCAAACAAATAATAATAACATTGAAA
+ATGAAGATCGGAAAATTACTTCCGAGCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAG
+TACCACTGCGGTGCCTGATATCGTTAGTCTTTCAACAGATTATTTGTCTATGACTACTCGTGAAGATCGT
+ATCCACACGATTAAAGATTTTCTTTCTCGTCCAATTATAATTCAAACTGGTCTTTGGTCTTCCGCTACAA
+CTGCCGAAACTCAATTGTATACTGCTAATTTCCCTGAAGTGTTCATTTCTAATACTATGTATCAAGAAAA
+GTTGCGTGGGTTCGTGGGTTTGCGAGCAACTTTAGTCATTAAAGTGCAAGTGAATTCCCAACCTTTCCAG
+CAAGGACGATTGATGCTACAGTATTATCCGTATGCACAGTATATGCCTAACCGTGTTTCTTTGGTGAATT
+CCACTCTCCAAGGACGCTCTGGTTGTCCTCGAACAGATTTGGATTTGAGCGTTGGTACGGAAGTTGAAAT
+GCGAATTCCTTATGTGTCCCCTCATGTATATTACAATCTTATTACTGGACAAGGATCATTTGGCGCTATA
+TATTTGGTTGTATATAGCCAACTAAGAGATCAAGTTACAGGAACAGGTTCTGTTGAATATACTGTTTGGG
+CTCATTTGGAAGATGTAGATGTGCAATACCCGACCGGTGCAAACATTTTCACGGGTAGCTCTCCAAATTT
+TGCCTCTTTGGGTCAGAAAATGAGTGATGGAAAATTCACTGAAAAAGACTTGAGAGATATTTGGACTTCA
+AAAGCGTACAATAAACAACCAGACAAAATTTTCGCACAAGTGGCTTCTGAAATAACACAACTCAAAGAAT
+CAGGAACAATTAGTTCTGGAATTGGACAAGTTTCTGAAGGTCTTTCTACCATGTCTAAAATCCCTATACT
+CGGAAATATGTTTACAAAACCCGCCTGGATTTCAGCTCAAGTATCTAATATCTTCAAGATGCTTGGTTTT
+TCAAAACCCACTGTTCAAGGTCTTCCTTGTGAATCGAAACTGCGTGGTCAAGTTCGAATGGCGAATTTTG
+ATGGCGCTGATACATCACATAAATTGGCTTTGTCTGCCCAAAACGAAATTGAAACAAAATCTGGACTTTC
+TGGAACTTCTCCTGATGAAATGGATTTATCACACGTCCTTTCCATACCAAATTTTTGGGATCGTTTTACT
+TGGAACACAACCGATGCCACTAGTTCTATTTTATGGGATAATTATGTTACACCAATGAAAATTAAACCAT
+ATTCCTCTACAATATTAGATAGATTTAGATGCACTCATATGGGTTTTGTAGCCAACACACACGGTTATTG
+GTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGACGTTTACGCATTAGT
+TTTATTCCATTTTATTATAATACGACTATATCTGCAGGAGTTCCCGATGTTTCTCGTACCCAAAAAGTAA
+TCGTTGATCTGCGCACCTCTACAGAAGTCTCTTTCACTATTCCGTATGTGTCTTCACGACCTTGGATGTA
+CTGTATTCGTCCTGAAGCTTCGTGGCTTGGAACCGATAATGCTTTGATGTACAACGCCGTTACGGGTATA
+GTGAGAGTTGAGGTTCTTAACCAGTTGGTTGCCGCTAACAACGTGTTTCAATCTATAGACACTATTGTTG
+AAGTTAGTGGTGGTCCTGATTTAACTTTTGCAGCACCAATGGCTCCCTCTTATGTTCCTTATTCTGGAGG
+TTTTACTTTAGCAGATGATGCGGCAGCAAAGAAACAGCGTGAGGAGGAGTATGACAACAACATACCTCAA
+ACTATTTCTAATCGTGGAAAACGTGAGGTTGAAGATGCTCGTATTGTTGCGCAAGTAATGGGTGAAGATT
+TAGCTATTCAAAGAAACGATGCTCAACATGGTGTTCATCCAATGACTATAGACACTCATAAGATCGACTC
+AAATTGGTCTCCGGAAGCGCATTGTATTGGTGAAAAGATTATGTCTATTCGCCAATTGATTAAGCGTTTT
+GGCATGGCTTTGAACTCCTTGAATTTGATAAGTGATGCACCAAACACCTTGATAGCACCATTTTCAGTTC
+AGCACCCAACTCCTGTTGTTGCCCCTGCTGAACCCATGTCCCTTTTTGAATATTATTATTTCATTTATGG
+ATTTTGGAGAGGTGGCATGAGATTTAAACTTCAGGCAGTACGTACAAACTCAGCAGAAACATCAGTTAAA
+ACCGACACAACTTGGACTGTAAATTTGTGGAATTCTGTACAAGATTCTTTTAATTCTCTAATTAATGTAT
+TTAGTACTACTGATTACCCTATAAAATCCACAGGAGCACTTCCAGCCGGAACAAGCGGTTTTGGCAATTC
+GATGACGTATATAGATCCTGAGGTTGAAGGTTTTATGGAATTTGAGATTCCATATTATAATATCTCCCAT
+ATTTCTCCAGCTACAACCTATGTTCGTGGTACTGAATCTCCTATTACAATTAATAGTGTCTTGCGTGGAC
+ATTTGCCACCACAAATTGTGGCTGTTGCACCACAGGGCACTATTGCCACTACAGATGTAGTGAACGCTCA
+ATTTGCTCGTGCTCCTTCTGACGACTTTTCATTTATGTATCTCGTTGGTGTTCCACCACTTACCAACGTC
+GCTCGTCCCTAACTCCCTTACTATTCTGGATCCTTTAAAATTTATTAGGATAGACAAAAATTAACTCTAT
+ATTAGATAGTATTAGATTAAGTTTCTTTTTGGTTTTGGGTTTTATTCAGTAACTATCTGCCCTGCTTACA
+CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTTTTACTAATTT
+TTAGTCAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCAATTTC
+GAATTAAAATAATAGTTGTTTTCT
+>gi|300871995|gb|GU983911.2|_Drosophila_C_virus_isolate_ZW122_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAATTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|300871993|gb|GU983910.2|_Drosophila_C_virus_isolate_Ez10_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|300871991|gb|GU983909.2|_Drosophila_C_virus_isolate_RG7_polyprotein_gene,_partial_cds
+CGAGGCACCTGTAGTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGAT
+>gi|300871989|gb|GU983908.2|_Drosophila_C_virus_isolate_RC18_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871987|gb|GU983906.2|_Drosophila_C_virus_isolate_Kn134_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGGATACTTTTCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATAC
+>gi|300871985|gb|GU983905.2|_Drosophila_C_virus_isolate_KN5_polyprotein_gene,_partial_cds
+TGAGGCACCTGTGTTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATCCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871983|gb|GU983902.2|_Drosophila_C_virus_isolate_Hi99.18_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGGGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|300871981|gb|GU983901.2|_Drosophila_C_virus_isolate_FSP_polyprotein_gene,_partial_cds
+CATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTA
+TCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTA
+GATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCG
+TCGACAAATCAAAAATCGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGAT
+GGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGA
+ATACTCTTCCCGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTT
+AAAAGAAGATATCATGAA
+>gi|300871979|gb|GU983900.2|_Drosophila_C_virus_isolate_AL7_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGATTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTAGTGATAT
+>gi|300871977|gb|GU983899.2|_Drosophila_C_virus_isolate_AkGB1_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|300871975|gb|GU983898.2|_Drosophila_C_virus_isolate_vir_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|300871973|gb|GU983897.2|_Drosophila_C_virus_isolate_G96.232_polyprotein_gene,_partial_cds
+TTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGA
+TGTTGAAACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACA
+CGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAA
+TCAAAAATAGGAAAATTTATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAA
+AGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTC
+CCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAG
+ATATTATGAAG
+>gi|300871971|gb|GU983896.2|_Drosophila_C_virus_isolate_G96.45_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TT
+>gi|300871969|gb|GU983894.2|_Drosophila_C_virus_isolate_16a10_polyprotein_gene,_partial_cds
+CTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTGGACCCGTTCAATCGCGCCC
+CGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAA
+ATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGGATGTTTG
+ATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTT
+GACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAATATTCAAGCCACTGTGATT
+GATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTTTTGGTGATAT
+>gi|300871967|gb|GU983893.2|_Drosophila_C_virus_isolate_Tana11_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871965|gb|GU983892.2|_Drosophila_C_virus_isolate_psjmg_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTAGTGATAT
+>gi|300871963|gb|GU983891.2|_Drosophila_C_virus_isolate_PS94_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871961|gb|GU983890.2|_Drosophila_C_virus_isolate_Baf153_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871959|gb|GU983889.2|_Drosophila_C_virus_isolate_Bam73_L_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATCCAAGCCACTGTGATTGATACAACAGACAGGTGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTGATAT
+>gi|300871957|gb|GU983888.2|_Drosophila_C_virus_isolate_Bam73_H_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTAGTGATAT
+>gi|300871955|gb|GU983885.2|_Drosophila_C_virus_isolate_16a9_polyprotein_gene,_partial_cds
+GTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCAA
+AACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTGGACCCGTTCA
+ATCGCACCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGA
+GACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATTAAAAATAGGAAAATTTACTCCCAAG
+GAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAA
+TGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCC
+ACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTTTTAGTGATAT
+>gi|300871953|gb|GU983884.2|_Drosophila_C_virus_isolate_Tam15_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCAAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCACCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATTAAAAATAGGAAAATT
+TACTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTAGTGATAT
+>gi|300871951|gb|GU983883.2|_Drosophila_C_virus_isolate_Tam11_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATCCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871949|gb|GU983882.2|_Drosophila_C_virus_isolate_ZW141_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATCCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT
+TTAGTTATAT
+>gi|300871947|gb|GU983881.2|_Drosophila_C_virus_isolate_HB2_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAATTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|300871945|gb|GU983880.2|_Drosophila_C_virus_isolate_Safr10_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGAT
+>gi|300871943|gb|GU983879.2|_Drosophila_C_virus_isolate_mel15_L_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTGTGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTAGTGATAT
+>gi|300871941|gb|GU983878.2|_Drosophila_C_virus_isolate_mel15_H_polyprotein_gene,_partial_cds
+TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGATTATACCATTATGACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT
+TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTAGTGATAT
+>gi|300871939|gb|GU983877.2|_Drosophila_C_virus_isolate_Linz44_polyprotein_gene,_partial_cds
+CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG
+AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG
+GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT
+TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAATTGCGTCGACAAATCAAAAATAGGAAAATT
+TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG
+AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA
+TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT
+TTGGTGATAT
+>gi|2388672|gb|AF014388.1|_Drosophila_C_virus_strain_EB,_complete_genome
+TTTATATCGTGTGTACATATAAATATGTACACACGGCTTTTAGGTAGAATATTGTTTTCAATGTTGATTT
+TAAAGGTAACTTTGGTTATTATGCTTTACGGTTTTCATTGTTGATGGTATTTGTGGCCTGCGGTCCCTAA
+TTGTTGAATTATTTATTCTGATACGTTGTTTTCATTGTTGATGGTAAGGATTCTTATTTTGAAGTGGTTT
+TTCAGAAGATAACTCTAAATATGAATTATGCCTTATTGTTTTCAATGTTGATGGCCTTCGTTTAAATACT
+CTTTGTTAATGACGGTAATCAAAGATTACATCTCAAACTTAGATTAATATTTTTAAGTAGGGTATACTGA
+GTTAGTCCTCTCTCTTTACTGATTTTGATATCTGGTAATTGACTTCGAAGAAAGATGCGTCTTTTGGATT
+TGTAATGACTGGGCCTTAAGTTCATAGGTGTTATTACATGGAGGAACACATTACTTTGGTTGATGATGAT
+GTTTTGATGATGACTTTCAATGTATGTGCTTATGTTAAGCCTGACATAAGAACTTACTAGTTTGCATAAT
+GCAAAGGGTTAGTATATGATTTTTAGTATGTGGATTTTGACACTGCCTTTGATTAGGATGTGTGAATGAT
+TTTGAAACATATTAAGATGTTTATACGAGCGTGTTGTTTACTATTTTCAGGATATGTGGAAGCGGTTGTG
+TATGATCTATACGCACATTTAGTTCCCAGAGGGCGTTGTCGTCTCCCCCTAAGCAAGGGAGAAACACGTG
+GCACATGATCTTGCGCTTAACGATAAAAATGGAATCTGATAAAAGTATGGCCTGTTTAAATAGAATTTTG
+ATGAATAAGATGATGTTTGTGGAAGATAAGATCTCTACCCTTAAGATGGTTGCTGATTATTATCAAAAAG
+AAGTAAAGTATGATTTTGATGCAGTTGAATCTCCCCGTGAGGCACCTGTATTTAGATGTACTTGTCGATT
+CCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATG
+TTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGTT
+ACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTAAT
+TAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAA
+CAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTG
+ATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAA
+ATATGTTTCTTTAAAAGAGGATATTATGAAGATTGTTTTAGTGATATTGCTTGTTCGTCTTTTAATGGTT
+TGGAAGAAGTATCGTGCTTCTCTGTGTGTTATTTTAATCTTTATTTTTAAATTTTATGGATTCGATCAAA
+AGTTGATTGATTTAATTATGGATTTGAAGAATAAAATATTTTCACAGGGTGCATTGGAAGATACAGTTGA
+GGAGGTTGTATATCATCCTTGGTTCCATACGTGTGGAAAAATCATCTTTGCGGTTATGGCTTTCTTAACA
+ATTAAGAAAATTCCTGGTAAACAGGATTGGGATAGTTACATAACACGTTTAGATCGTATCCCAAAATCTA
+TTGAGGGAGCTAAAAAGATCACTGATTACTGTTCAGAATATTTTAATATTGCTAATGATCAGATCAAGAT
+GATGGTTCTTGGAAAGACTAAAGAAGAATTGCAACGTGCTAATGGACTATATGGAGAAATTCAAGCTTGG
+GCTCAAGAGGTTCGCCAGTATTTGGAATTGGATCAACGGAATAAAATTGATCTAGATACTGAAACCGCAA
+ATCGTGTTGAACAACTTTGGATAAAGGGCTTGAAATTCAAGAGTGAACCCCTTTTGAGTAAGGAAATGTC
+AGCTTTAGTTCATACAACTCTTTTACCAGCTAAGCAATTGTACGAGTATGTATCGTGTTCTCCTGTTAAA
+GGGGGAGGACCACGTATGCGTCCAATTTGTTTATGGTTGGTAGGTGAATCAGGAGTTGGTAAGACTGAAA
+TGGTATATCCATTGTGCATTGATGTTCTTCGGGAAATGGGGATGATTAAGAAAGATGATTTTCATCATCA
+AGTTTATGGTCGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT
+GATGCATTTCAGAAGAAAGATGACAAAACAGCAGCTAACCCAGAAATTTTTGAGGTTATTCGCTCTTGCA
+ACACTTTTCCTCAGCATTTACATATGGCAGCTCTTCATGATAAAAATACTTTTTCTGCTGCTGAATTACT
+CTTATATACCACTAATGATTATAATGTTAAGCTGGAATCTATTACTTTTCCCGATGCTTTCTTTAATCGT
+ATGGGCGATATGGCTTATAAAGTTAGTCCTAAGAAAGAGTATGGTATTGAAACCGAGAAAGGGAATTCAG
+GTAAAACTTATTTAAAATTGGATAAGAGTAAATTGGACAAAACAAAAGCTATTGACCTTTCAGTGTATGA
+ATTCCAAAAAATTGTACGTGACGAGAAAAGTGATGCAGGTTGGATTGATTCTGGATCACCCTTGGACTAT
+GAAGATTTTGCTAAATTAGTGTGTTCAAAATGGAAAGAAGCGAAACAATCTTCAATGAATAAATTGAAAT
+TTTTGGAAGAATATGCTATTCGTGCTCAGGTTGGATCAGAAGAAAATTCTGAATATGGTGATTGTATAGA
+TTTTGTCGATGATATTGCCAAACGCTTACAAAAAGGTGAAACTCTTGAAGAAATAGAGTTTGATTATGCC
+TCAGATCCAGAGATGTTTACTCAATACTATCATTTTAAATCTACAATTAAACCGGCATCGCGTTGGCAGA
+AGTATAAGGATCGGATGGACATTTGTTTGAGCGACTGTAAGACTTATTTAGCAAAGAAATACGAAGAAAT
+TAAGAAAATTCTTGCCGAACATCCTATCTTGACGATTTTAGGAATGATAGGGGTTGCCTTATCTGCTCTG
+GCAATGTACTATTGGTTTTCTAAATCGTTGGATCCTGTAGAAGCCGAGGTTGCTCCTTCTGGTGACGCTA
+AAACAGTGCGCTTACCAAGGAAACTCGTTGAGATTGGTGCTTCTGGAGATGTTAAAACACAGAAGATTGT
+GAAACCCGTTGTAGAGACCGAATGGCATCGTAACAATAAAGGAGAGATTGAAATTTCTTGTGATGAATGT
+GGTATGCATAGGATGTCTGCATTTAACAATATGACAGATGAAGAATTTGATAACTGTACATATGAAGATT
+TGAATAAGGACCAGAAACGTGAACTTGCCCAGTGGTCTACTAAAGATTCTTGGTTAGGTCGATTCTTTTT
+GAGTCGAGATCGCAAGAATAAGGTTGGAATTTGGGCCGAAGTGGGACAATCAGGTGATGTTAAAACAAAT
+AAAGCTCAGATTAAACGTGTTGAAGCTGGAGCCGAAGAATTAGTTACTGTTGCTTTAACTCAAGGTTGTT
+CTGATGATGCTGCACACAATTTGATGATTGACGTTTTCCAAAAAAATACATATAGAATGTCATACTTCCG
+TGGAGACAAGCGTTATCAACTTGGAAATTGTACATTTGTTCGTGGTTGGTCTTTTATTATGCCATATCAT
+TTTGTACAGGCTGTGTTTGCGCGAAGATTGCCACCTAACACGATTATTTCTTTGTCCCAACAGATGTCTG
+AAGATTTAATGCAAATTCCATTATCACACTTCTTTTCTGCTGGTGTTGACAATTTTTATTTAACAGATAA
+TTGTGTGCGCTTGCCATTTAAAAATGGAGATTTTCGCGATTGTGTTATGGTTAATTTACATTCACGAATG
+TGCACGCCACATCGTGATTTGGTTCGGCATTTTATTTTAACTTCTGATCAAGGTAAATTGAAGGGATCTT
+TTAGTGGTGCAATGGCAACTTTCCATGTTAACAATATGGGTTTATATCGTGTTTATAATTGGCTAAATGC
+AGTTCGTCCTTGCGATAAAAAGATAGAAATTTTCCACCCTGAAGATGGTTTTGAGTATCCCGAAGAATCA
+TATATTCAACGTGACTGTTATGAATATAATGCACCGACTCGTACTGGGGATTGTGGATCTATTATTGGAT
+TGTATAATAAATATTTAGAAAGGAAAATCATCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGG
+TTATGCGTGCCCTTTGACACAAGAGTGTCTTGAGACTGCTTTTTCTGCTTTAGTAAATAAAAATAAGAAG
+AATATTTCCTCACAATTTTATTATGAAATACCCAATATGGTTGATCCACTCGGTGATAGTAGTGTTCCTG
+AAGGTAAGTTTTACGCTTTAGGAAAGTCATCTATTCGTGTGGGACAGGCAGTTAATTCGTCCATAATTCC
+TTCTCGAATTTATGGGAAGTTGTCTGTTCCTACAATGAAACCAGCACTACTCAAGCCAACGATTCTGAAT
+AACAAAGTACATAATCCTTTATTGTCGGGACTTAAGAAATGTGGTGTAGACACTGCGGTCTTGAGTGATG
+ATGAAGTTTTGAGTGCTTCACAAGATGTTTGTCGTGTTATGTTGAACCAATATAATAAAAATTTGAATAA
+AACAAAGTATCAACGCATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGT
+GCTATTAATCGTACAACGTCACCAGGATTTCCTTATGCACAAATGAAAAGAAATGCTCCAGGTAAACAGC
+AATGGATGGGTTTTGGTGAAGAATTTGATTTTACAAGTAATTATGCACTAGCTTTGCGGAAAGATGTTGA
+ACAACTCATTGAAGATTGTGCTAGCGGAAAAATATCTAATGTCATTTTTGTAGATACATTGAAAGATGAG
+CGACGCGATATAGCTAAAGTAAATGTAGGTAAAACGCGTGTATTTTCTGCAGGTCCTCAACATTTTGTAG
+TTGCATTTCGCCAATATTTCCTACCTTTTGCTGCTTGGTTGATGCATAATCGCATTTCGAACGAAGTAGC
+AGTTGGCACTAATGTTTATTCATCTGATTGGGAACGTATTGCAAAACGTCTTAAAACAAAAGGTAGTCAC
+GTCATTGCGGGGGACTTTGGAAATTTCGATGGATCTTTAGTAGCACAAATTTTGTGGGCCATATTTTGGG
+AAATTTTTGTTGTATGGCTTAAGCAATTTATTGATATAGAGAATTCAGAAGGAAAACGTATTTTATGTAT
+CTGTCTTGGTTTGTGGTCACATTTAGTTCACTCTGTTCATATTTATGAAGATAATGTATATATGTGGACT
+CATTCTCAACCTTCTGGCAATCCTTTCACTGTTATTATTAATTGCTTGTATAATTCGATTATTATGCGAC
+TGTCATGGATTCGTGTGATGGAGAAATTTCAACCTAGACTTAAGTCCATGAAGTGGTTCAACGAATATGT
+CGCCTTGATAACATATGGTGACGACAATGTTTTAAACATTGATGCAAAGGTTGTGGAATGGTTTAATCAG
+ATTAACATTAGTGAGGTTATGACTGAAATGCGACATGAATATACGGACGAAGCTAAAACTGGTGATATTG
+TTAAATCTCGTAAATTAGAAGATATTTTCTTTTTGAAGAGAAAATTTCGTTTTAGCCCAGAATTACAACG
+CCATGTTGCTCCATTGAAGATCGAAGTTATTTATGAAATGTTGAATTGGTCTCGCCGCTCTATAGATCCA
+GATGAAATCTTGATGTCGAACATTGAAACGGCTTTTCGTGAAGTAGTTTACCACGGAAAAGAAGAATACG
+ATAAACTAAGGTCAGCGGTATTGGCGTTGAAGGTACCCCAGGAACTTCCTGAAAACCCTCAGATTTTGAC
+GTACAACCAATATTTGCACGATATTGAATATCTTGCGGACCCTTTGTACGACTTTTAGTTAAGATGTGAT
+CTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT
+ATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCTCTCTGCTTCTTAT
+ATGATTAGGTTGTCATTTAGAATAAGAAAATAACCTGCTAACTTTCAAACAAATAATAATAACATTGAAA
+ATGAAGATCGGAAAATTACTTCCGAGCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAG
+TACCACTGCGGTGCCTGATATCGTTAGTCTTTCAACAGATTATTTGTCTATGACTACTCGTGAAGATCGT
+ATCCACACGATTAAAGATTTTCTTTCTCGTCCAATTATAATTCAAACTGGTCTTTGGTCTTCCGCTACAA
+CTGCCGAAACTCAATTGTATACTGCTAATTTCCCTGAAGTGTTCATTTCTAATACTATGTATCAAGAAAA
+GTTGCGTGGGTTCGTGGGTTTGCGAGCAACTTTAGTCATTAAAGTGCAAGTGAATTCCCAACCTTTCCAG
+CAAGGACGATTGATGCTACAGTATTATCCGTATGCACAGTATATGCCTAACCGTGTTTCTTTGGTGAATT
+CCACTCTCCAAGGACGCTCTGGTTGTCCTCGAACAGATTTGGATTTGAGCGTTGGTACGGAAGTTGAAAT
+GCGAATTCCTTATGTGTCCCCTCATGTATATTACAATCTTATTACTGGACAAGGATCATTTGGCGCTATA
+TATTTGGTTGTATATAGCCAACTAAGAGATCAAGTTACAGGAACAGGTTCTGTTGAATATACTGTTTGGG
+CTCATTTGGAAGATGTAGATGTGCAATACCCGACCGGTGCAAACATTTTCACGGGTAGCTCTCCAAATTT
+TGCCTCTTTGGGTCAGAAAATGAGTGATGGAAAATTCACTGAAAAAGACTTGAGAGATATTTGGACTTCA
+AAAGCGTACAATAAACAACCAGACAAAATTTTCGCACAAGTGGCTTCTGAAATAACACAACTCAAAGAAT
+CAGGAACAATTAGTTCTGGAATTGGACAAGTTTCTGAAGGTCTTTCTACCATGTCTAAAATCCCTATACT
+CGGAAATATGTTTACAAAACCCGCCTGGATTTCAGCTCAAGTATCTAATATCTTCAAGATGCTTGGTTTT
+TCAAAACCCACTGTTCAAGGTCTTCCTTGTGAATCGAAACTGCGTGGTCAAGTTCGAATGGCGAATTTTG
+ATGGCGCTGATACATCACATAAATTGGCTTTGTCTGCCCAAAACGAAATTGAAACAAAATCTGGACTTTC
+TGGAACTTCTCCTGATGAAATGGATTTATCACACGTCCTTTCCATACCAAATTTTTGGGATCGTTTTACT
+TGGAACACAACCGATGCCACTAGTTCTATTTTATGGGATAATTATGTTACACCAATGAAAATTAAACCAT
+ATTCCTCTACAATATTAGATAGATTTAGATGCACTCATATGGGTTTTGTAGCCAACACACACGGTTATTG
+GTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGACGTTTACGCATTAGT
+TTTATTCCATTTTATTATAATACGACTATATCTGCAGGAGTTCCCGATGTTTCTCGTACCCAAAAAGTAA
+TCGTTGATCTGCGCACCTCTACAGAAGTCTCTTTCACTATTCCGTATGTGTCTTCACGACCTTGGATGTA
+CTGTATTCGTCCTGAAGCTTCGTGGCTTGGAACCGATAATGCTTTGATGTACAACGCCGTTACGGGTATA
+GTGAGAGTTGAGGTTCTTAACCAGTTGGTTGCCGCTAACAACGTGTTTCAATCTATAGACACTATTGTTG
+AAGTTAGTGGTGGTCCTGATTTAACTTTTGCAGCACCAATGGCTCCCTCTTATGTTCCTTATTCTGGAGG
+TTTTACTTTAGCAGATGATGCGGCAGCAAAGAAACAGCGTGAGGAGGAGTATGACAACAACATACCTCAA
+ACTATTTCTAATCGTGGAAAACGTGAGGTTGAAGATGCTCGTATTGTTGCGCAAGTAATGGGTGAAGATT
+TAGCTATTCAAAGAAACGATGCTCAACATGGTGTTCATCCAATGACTATAGACACTCATAAGATCGACTC
+AAATTGGTCTCCGGAAGCGCATTGTATTGGTGAAAAGATTATGTCTATTCGCCAATTGATTAAGCGTTTT
+GGCATGGCTTTGAACTCCTTGAATTTGATAAGTGATGCACCAAACACCTTGATAGCACCATTTTCAGTTC
+AGCACCCAACTCCTGTTGTTGCCCCTGCTGAACCCATGTCCCTTTTTGAATATTATTATTTCATTTATGG
+ATTTTGGAGAGGTGGCATGAGATTTAAACTTCAGGCAGTACGTACAAACTCAGCAGAAACATCAGTTAAA
+ACCGACACAACTTGGACTGTAAATTTGTGGAATTCTGTACAAGATTCTTTTAATTCTCTAATTAATGTAT
+TTAGTACTACTGATTACCCTATAAAATCCACAGGAGCACTTCCAGCCGGAACAAGCGGTTTTGGCAATTC
+GATGACGTATATAGATCCTGAGGTTGAAGGTTTTATGGAATTTGAGATTCCATATTATAATATCTCCCAT
+ATTTCTCCAGCTACAACCTATGTTCGTGGTACTGAATCTCCTATTACAATTAATAGTGTCTTGCGTGGAC
+ATTTGCCACCACAAATTGTGGCTGTTGCACCACAGGGCACTATTGCCACTACAGATGTAGTGAACGCTCA
+ATTTGCTCGTGCTCCTTCTGACGACTTTTCATTTATGTATCTCGTTGGTGTTCCACCACTTACCAACGTC
+GCTCGTCCCTAACTCCCTTACTATTCTGGATCCTTTAAAATTTATTAGGATAGACAAAAATTAACTCTAT
+ATTAGATAGTATTAGATTAAGTTTCTTTTTGGTTTTGGGTTTTATTCAGTAACTATCTGCCCTGCTTACA
+CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTTTTACTAATTT
+TTAGTCAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCAATTTC
+GAATTAAAATAATAGTTGTTTTCT
+>gi|236164939|emb|GN351241.1|_Sequence_1005_from_Patent_WO2007130519
+ATCATAAATGACAATTTTCTGTCCTTTATAACCATCCCAGAATTCAGTTTCAACTTGACG
+>gi|236164937|emb|GN351240.1|_Sequence_1004_from_Patent_WO2007130519
+CGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT
+>gi|236164934|emb|GN351239.1|_Sequence_1003_from_Patent_WO2007130519
+CCAATCCTGTTTACCAGGAATTTTCTTAATTGTTAAGAAAGCCATAACCGCAAAGATGAT
+>gi|236164931|emb|GN351238.1|_Sequence_1002_from_Patent_WO2007130519
+ATCATCTTTGCGGTTATGGCTTTCTTAACAATTAAGAAAATTCCTGGTAAACAGGATTGG
+>gi|236164929|emb|GN351237.1|_Sequence_1001_from_Patent_WO2007130519
+TGCAATTACTGTCAGTAAGCAGGAAAATTGCGAAAAACCTGGTAGCCCCTAAGGACTCTG
+>gi|236164927|emb|GN351236.1|_Sequence_1000_from_Patent_WO2007130519
+CAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCA
+>gi|236164925|emb|GN351235.1|_Sequence_999_from_Patent_WO2007130519
+AAATTTTGAAAATACAAAATAAAAGAGTCCAGAAGGGACAAGAATTAAAAATAATACCCG
+>gi|236164923|emb|GN351234.1|_Sequence_998_from_Patent_WO2007130519
+CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTT
+>gi|236164921|emb|GN351233.1|_Sequence_997_from_Patent_WO2007130519
+TCCAGAATGAAATTGAGTCTTAACAAATTTAAAAGTATAAACTATTGATCCACACCAATA
+>gi|236164917|emb|GN351232.1|_Sequence_996_from_Patent_WO2007130519
+TATTGGTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGA
+>gi|236164915|emb|GN351231.1|_Sequence_995_from_Patent_WO2007130519
+GGCACCGCAGTGGTACTAGGGGTAACTCCTTCACTAGAAAAGTGTACAATCTCTTTTTGC
+>gi|236164913|emb|GN351230.1|_Sequence_994_from_Patent_WO2007130519
+GCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAGTACCACTGCGGTGCC
+>gi|236164910|emb|GN351229.1|_Sequence_993_from_Patent_WO2007130519
+CCTGGATATTATGGGGCTGCCAATAGGCATCCTGAACAGTAAAACTAAATAGTTAACCTA
+>gi|236164908|emb|GN351228.1|_Sequence_992_from_Patent_WO2007130519
+TAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGG
+>gi|236164905|emb|GN351227.1|_Sequence_991_from_Patent_WO2007130519
+AGTTAACCTAATTATTAAGATAGCACTACTTCCTTCTTATTAACCTCTCAAAATTGTATA
+>gi|236164901|emb|GN351226.1|_Sequence_990_from_Patent_WO2007130519
+TATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT
+>gi|236164898|emb|GN351225.1|_Sequence_989_from_Patent_WO2007130519
+ATAGCACACATAAATTCATCATCTTGGGTTCCACGAATAGCTTCCTCATATGTTAAAATG
+>gi|236164896|emb|GN351224.1|_Sequence_988_from_Patent_WO2007130519
+CATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGTGCTAT
+>gi|236164894|emb|GN351223.1|_Sequence_987_from_Patent_WO2007130519
+GTGTCAAAGGGCACGCATAACCATGCTCTTCTGCATCATTTCCAGCAATGTGCATACCGA
+>gi|236164892|emb|GN351222.1|_Sequence_986_from_Patent_WO2007130519
+TCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGGTTATGCGTGCCCTTTGACAC
+>gi|92142564|dbj|BD294721.1|_Novel_tertiary_structure_having_ability_to_accelerate_translation_activ
+GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT
+AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT
+CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT
+>gi|30014277|dbj|BD177017.1|_Novel_translational_activity-promoting_higher-order_structure
+GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT
+AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT
+CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT