Mercurial > repos > drosofff > fetch_fasta_from_ncbi
changeset 0:4b34f2b5c14e draft
Uploaded
author | drosofff |
---|---|
date | Mon, 13 Apr 2015 18:17:08 -0400 |
parents | |
children | c1d17d173128 |
files | retrieve_fasta_from_NCBI.py retrieve_fasta_from_NCBI.xml test-data/output.fa |
diffstat | 3 files changed, 885 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/retrieve_fasta_from_NCBI.py Mon Apr 13 18:17:08 2015 -0400 @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +From a taxonomy ID retrieves all the nucleotide sequences +It returns a multiFASTA nuc/prot file + +Entrez Database UID common name E-utility Database Name +Nucleotide GI number nuccore +Protein GI number protein + +Retrieve strategy: + +esearch to get total number of UIDs (count) +esearch to get UIDs in batches +loop untile end of UIDs list: + epost to put a batch of UIDs in the history server + efetch to retrieve info from previous post + +retmax of efetch is 1/10 of declared value from NCBI + +queries are 1 sec delayed, to satisfy NCBI guidelines (more than what they request) + + +python get_fasta_from_taxon.py -i 1638 -o test.out -d protein +python get_fasta_from_taxon.py -i 327045 -o test.out -d nuccore # 556468 UIDs +""" + +import logging +import optparse +import time +import urllib +import urllib2 +import re +class Eutils: + + def __init__(self, options, logger): + self.logger = logger + self.base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + self.query_string = options.query_string + self.dbname = options.dbname + if options.outname: + self.outname = options.outname + else: + self.outname = 'NCBI_download' + '.' + self.dbname + '.fasta' + self.ids = [] + self.retmax_esearch = 100000 + self.retmax_efetch = 1000 + self.count = 0 + self.webenv = "" + self.query_key = "" + + def retrieve(self): + """ """ + self.get_count_value() + self.get_uids_list() + self.get_sequences() + + def get_count_value(self): + """ + just to retrieve Count (number of UIDs) + Total number of UIDs from the retrieved set to be shown in the XML + output (default=20). By default, ESearch only includes the first 20 + UIDs retrieved in the XML output. If usehistory is set to 'y', + the remainder of the retrieved set will be stored on the History server; + + http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch + """ + self.logger.info("retrieving data from %s" % self.base) + self.logger.info("for Query: %s and database: %s" % + (self.query_string, self.dbname)) + querylog = self.esearch(self.dbname, self.query_string, '', '', "count") + self.logger.debug("Query response:") + for line in querylog: + self.logger.debug(line.rstrip()) + if '</Count>' in line: + self.count = int(line[line.find('<Count>')+len('<Count>') : line.find('</Count>')]) + self.logger.info("Founded %d UIDs" % self.count) + + def get_uids_list(self): + """ + Increasing retmax allows more of the retrieved UIDs to be included in the XML output, + up to a maximum of 100,000 records. + from http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch + """ + retmax = self.retmax_esearch + if (self.count > retmax): + num_batches = (self.count / retmax) + 1 + else: + num_batches = 1 + self.logger.info("Batch size for esearch action: %d UIDs" % retmax) + self.logger.info("Number of batches for esearch action: %d " % num_batches) + for n in range(num_batches): + querylog = self.esearch(self.dbname, self.query_string, n*retmax, retmax, '') + for line in querylog: + if '<Id>' in line and '</Id>' in line: + uid = (line[line.find('<Id>')+len('<Id>') : line.find('</Id>')]) + self.ids.append(uid) + self.logger.info("Retrieved %d UIDs" % len(self.ids)) + + def esearch(self, db, term, retstart, retmax, rettype): + url = self.base + "esearch.fcgi" + self.logger.debug("url: %s" % url) + values = {'db': db, + 'term': term, + 'rettype': rettype, + 'retstart': retstart, + 'retmax': retmax} + data = urllib.urlencode(values) + self.logger.debug("data: %s" % str(data)) + req = urllib2.Request(url, data) + response = urllib2.urlopen(req) + querylog = response.readlines() + time.sleep(1) + return querylog + + def epost(self, db, ids): + url = self.base + "epost.fcgi" + self.logger.debug("url_epost: %s" % url) + values = {'db': db, + 'id': ids} + data = urllib.urlencode(values) + req = urllib2.Request(url, data) + #self.logger.debug("data: %s" % str(data)) + req = urllib2.Request(url, data) + response = urllib2.urlopen(req) + querylog = response.readlines() + self.logger.debug("query response:") + for line in querylog: + self.logger.debug(line.rstrip()) + if '</QueryKey>' in line: + self.query_key = str(line[line.find('<QueryKey>')+len('<QueryKey>'):line.find('</QueryKey>')]) + if '</WebEnv>' in line: + self.webenv = str(line[line.find('<WebEnv>')+len('<WebEnv>'):line.find('</WebEnv>')]) + self.logger.debug("*** epost action ***") + self.logger.debug("query_key: %s" % self.query_key) + self.logger.debug("webenv: %s" % self.webenv) + time.sleep(1) + + def efetch(self, db, query_key, webenv): + url = self.base + "efetch.fcgi" + self.logger.debug("url_efetch: %s" % url) + values = {'db': db, + 'query_key': query_key, + 'webenv': webenv, + 'rettype': "fasta", + 'retmode': "text"} + data = urllib.urlencode(values) + req = urllib2.Request(url, data) + self.logger.debug("data: %s" % str(data)) + req = urllib2.Request(url, data) + response = urllib2.urlopen(req) + fasta = response.read() + if self.dbname != "pubmed": + assert fasta.startswith(">"), fasta + fasta = self.sanitiser(self.dbname, fasta) # + time.sleep(1) + return fasta + + def sanitiser(self, db, fastaseq): + if db not in "nuccore protein" : return fastaseq + regex = re.compile(r"[ACDEFGHIKLMNPQRSTVWYBZ]{49,}") + sane_seqlist = [] + seqlist = fastaseq.split("\n\n") + for seq in seqlist[:-1]: + fastalines = seq.split("\n") + if len(fastalines) < 2: + self.logger.info("Empty sequence for %s" % ("|".join(fastalines[0].split("|")[:4]) ) ) + self.logger.info("%s download is skipped" % ("|".join(fastalines[0].split("|")[:4]) ) ) + continue + if db == "nuccore": + badnuc = 0 + for nucleotide in fastalines[1]: + if nucleotide not in "ATGC": + badnuc += 1 + if float(badnuc)/len(fastalines[1]) > 0.4: + self.logger.info("%s ambiguous nucleotides in %s or download interrupted at this offset | %s" % ( float(badnuc)/len(fastalines[1]), "|".join(fastalines[0].split("|")[:4]), fastalines[1]) ) + self.logger.info("%s download is skipped" % (fastalines[0].split("|")[:4]) ) + continue + fastalines[0] = fastalines[0].replace(" ","_")[:100] # remove spaces and trim the header to 100 chars + cleanseq = "\n".join(fastalines) + sane_seqlist.append(cleanseq) + elif db == "protein": + fastalines[0] = fastalines[0][0:100] + fastalines[0] = fastalines[0].replace(" ", "_") + fastalines[0] = fastalines[0].replace("[", "_") + fastalines[0] = fastalines[0].replace("]", "_") + fastalines[0] = fastalines[0].replace("=", "_") + fastalines[0] = fastalines[0].rstrip("_") # because blast makedb doesn't like it + fastalines[0] = re.sub(regex, "_", fastalines[0]) + cleanseq = "\n".join(fastalines) + sane_seqlist.append(cleanseq) +# sane_seqlist[-1] = sane_seqlist[-1] + "\n" # remove to have sequence blocks not separated by two \n + return "\n".join(sane_seqlist) + + def get_sequences(self): + """ + Total number of records from the input set to be retrieved, up to a maximum + of 10,000. Optionally, for a large set the value of retstart can be iterated + while holding retmax constant, thereby downloading the entire set in batches + of size retmax. + + http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch + + """ + batch_size = self.retmax_efetch + count = self.count + uids_list = self.ids + self.logger.info("Batch size for efetch action: %d" % batch_size) + self.logger.info("Number of batches for efetch action: %d" % ((count / batch_size) + 1)) + with open(self.outname, 'w') as out: + for start in range(0, count, batch_size): + end = min(count, start+batch_size) + batch = uids_list[start:end] + self.epost(self.dbname, ",".join(batch)) + self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) + mfasta = self.efetch(self.dbname, self.query_key, self.webenv) + out.write(mfasta + '\n') + + +LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' +LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + + +def __main__(): + """ main function """ + parser = optparse.OptionParser(description='Retrieve data from NCBI') + parser.add_option('-i', dest='query_string', help='NCBI Query String') + parser.add_option('-o', dest='outname', help='output file name') + parser.add_option('-l', '--logfile', help='log file (default=stderr)') + parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)') + parser.add_option('-d', dest='dbname', help='database type') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + log_level = getattr(logging, options.loglevel) + kwargs = {'format': LOG_FORMAT, + 'datefmt': LOG_DATEFMT, + 'level': log_level} + if options.logfile: + kwargs['filename'] = options.logfile + logging.basicConfig(**kwargs) + logger = logging.getLogger('data_from_NCBI') + + E = Eutils(options, logger) + E.retrieve() + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/retrieve_fasta_from_NCBI.xml Mon Apr 13 18:17:08 2015 -0400 @@ -0,0 +1,57 @@ +<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="0.9.2"> + <description></description> + <command interpreter="python">retrieve_fasta_from_NCBI.py -i "$queryString" -d $dbname -o $outfilename -l $logfile </command> + + <inputs> + <param name="queryString" type="text" size="5x80" area="True" value="txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" label="Query to NCBI in entrez format" help="exemple:'Drosophila melanogaster[Organism] AND Gcn5[Title]"> + <sanitizer> + <valid initial="string.printable"> + <remove value="""/> + <remove value="\"/> + </valid> + <mapping initial="none"> + <add source=""" target="\""/> + <add source="\" target="\\"/> + </mapping> + </sanitizer> + </param> + <param name="dbname" type="select" label="NCBI database"> + <option value="nuccore">Nucleotide</option> + <option value="protein">Protein</option> +<!-- <option value="pubmed">Pubmed (experimental)</option> --> + </param> + </inputs> + <outputs> + <data name="outfilename" format="fasta" label="${tool.name} on ${on_string}: queryString${queryString.value}.${dbname.value_label}.fasta" /> + <data format="txt" name="logfile" label="${tool.name} on ${on_string}: log"/> + </outputs> + <tests> + <test> + <param name="queryString" value="DCV AND virus" /> + <param name="dbname" value="nuccore" /> + <output name="outfilename" ftype="fasta" file="output.fa" /> + <!-- <output name="logfile" ftype="txt" file="log.txt" /> log.txt changes with timestamp. removed to pass the test --> + </test> + </tests> + <help> +**What it does** + +This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database for a given entrez query. + +The tool is preset with "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose + +See `Entrez help`_ for explanation of query formats + +**Acknowledgments** + +This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_. + +It is Copyright © 2014-2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_. + +.. _Entrez help: http://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options +.. _get_fasta_from_taxon: https://toolshed.g2.bx.psu.edu/view/crs4/get_fasta_from_taxon +.. _CNRS and University Pierre et Marie Curie: http://www.ibps.upmc.fr/en +.. _MIT license: http://opensource.org/licenses/MIT + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fa Mon Apr 13 18:17:08 2015 -0400 @@ -0,0 +1,577 @@ +>gi|92133599|dbj|BD295732.1|_WO_2003033719-A/3:_Novel_tertiary_structure_having_ability_to_accelerat +GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT +AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT +CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT +>gi|28414844|dbj|BD173513.1|_WO_2002061080-A/3:_Novel_tertiary_structure_having_ability_to_accelerat +GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT +AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT +CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT +>gi|9629650|ref|NC_001834.1|_Drosophila_C_virus,_complete_genome +TTTATATCGTGTGTACATATAAATATGTACACACGGCTTTTAGGTAGAATATTGTTTTCAATGTTGATTT +TAAAGGTAACTTTGGTTATTATGCTTTACGGTTTTCATTGTTGATGGTATTTGTGGCCTGCGGTCCCTAA +TTGTTGAATTATTTATTCTGATACGTTGTTTTCATTGTTGATGGTAAGGATTCTTATTTTGAAGTGGTTT +TTCAGAAGATAACTCTAAATATGAATTATGCCTTATTGTTTTCAATGTTGATGGCCTTCGTTTAAATACT +CTTTGTTAATGACGGTAATCAAAGATTACATCTCAAACTTAGATTAATATTTTTAAGTAGGGTATACTGA +GTTAGTCCTCTCTCTTTACTGATTTTGATATCTGGTAATTGACTTCGAAGAAAGATGCGTCTTTTGGATT +TGTAATGACTGGGCCTTAAGTTCATAGGTGTTATTACATGGAGGAACACATTACTTTGGTTGATGATGAT +GTTTTGATGATGACTTTCAATGTATGTGCTTATGTTAAGCCTGACATAAGAACTTACTAGTTTGCATAAT +GCAAAGGGTTAGTATATGATTTTTAGTATGTGGATTTTGACACTGCCTTTGATTAGGATGTGTGAATGAT +TTTGAAACATATTAAGATGTTTATACGAGCGTGTTGTTTACTATTTTCAGGATATGTGGAAGCGGTTGTG +TATGATCTATACGCACATTTAGTTCCCAGAGGGCGTTGTCGTCTCCCCCTAAGCAAGGGAGAAACACGTG +GCACATGATCTTGCGCTTAACGATAAAAATGGAATCTGATAAAAGTATGGCCTGTTTAAATAGAATTTTG +ATGAATAAGATGATGTTTGTGGAAGATAAGATCTCTACCCTTAAGATGGTTGCTGATTATTATCAAAAAG +AAGTAAAGTATGATTTTGATGCAGTTGAATCTCCCCGTGAGGCACCTGTATTTAGATGTACTTGTCGATT +CCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATG +TTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGTT +ACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTAAT +TAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAA +CAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTG +ATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAA +ATATGTTTCTTTAAAAGAGGATATTATGAAGATTGTTTTAGTGATATTGCTTGTTCGTCTTTTAATGGTT +TGGAAGAAGTATCGTGCTTCTCTGTGTGTTATTTTAATCTTTATTTTTAAATTTTATGGATTCGATCAAA +AGTTGATTGATTTAATTATGGATTTGAAGAATAAAATATTTTCACAGGGTGCATTGGAAGATACAGTTGA +GGAGGTTGTATATCATCCTTGGTTCCATACGTGTGGAAAAATCATCTTTGCGGTTATGGCTTTCTTAACA +ATTAAGAAAATTCCTGGTAAACAGGATTGGGATAGTTACATAACACGTTTAGATCGTATCCCAAAATCTA +TTGAGGGAGCTAAAAAGATCACTGATTACTGTTCAGAATATTTTAATATTGCTAATGATCAGATCAAGAT +GATGGTTCTTGGAAAGACTAAAGAAGAATTGCAACGTGCTAATGGACTATATGGAGAAATTCAAGCTTGG +GCTCAAGAGGTTCGCCAGTATTTGGAATTGGATCAACGGAATAAAATTGATCTAGATACTGAAACCGCAA +ATCGTGTTGAACAACTTTGGATAAAGGGCTTGAAATTCAAGAGTGAACCCCTTTTGAGTAAGGAAATGTC +AGCTTTAGTTCATACAACTCTTTTACCAGCTAAGCAATTGTACGAGTATGTATCGTGTTCTCCTGTTAAA +GGGGGAGGACCACGTATGCGTCCAATTTGTTTATGGTTGGTAGGTGAATCAGGAGTTGGTAAGACTGAAA +TGGTATATCCATTGTGCATTGATGTTCTTCGGGAAATGGGGATGATTAAGAAAGATGATTTTCATCATCA +AGTTTATGGTCGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT +GATGCATTTCAGAAGAAAGATGACAAAACAGCAGCTAACCCAGAAATTTTTGAGGTTATTCGCTCTTGCA +ACACTTTTCCTCAGCATTTACATATGGCAGCTCTTCATGATAAAAATACTTTTTCTGCTGCTGAATTACT +CTTATATACCACTAATGATTATAATGTTAAGCTGGAATCTATTACTTTTCCCGATGCTTTCTTTAATCGT +ATGGGCGATATGGCTTATAAAGTTAGTCCTAAGAAAGAGTATGGTATTGAAACCGAGAAAGGGAATTCAG +GTAAAACTTATTTAAAATTGGATAAGAGTAAATTGGACAAAACAAAAGCTATTGACCTTTCAGTGTATGA +ATTCCAAAAAATTGTACGTGACGAGAAAAGTGATGCAGGTTGGATTGATTCTGGATCACCCTTGGACTAT +GAAGATTTTGCTAAATTAGTGTGTTCAAAATGGAAAGAAGCGAAACAATCTTCAATGAATAAATTGAAAT +TTTTGGAAGAATATGCTATTCGTGCTCAGGTTGGATCAGAAGAAAATTCTGAATATGGTGATTGTATAGA +TTTTGTCGATGATATTGCCAAACGCTTACAAAAAGGTGAAACTCTTGAAGAAATAGAGTTTGATTATGCC +TCAGATCCAGAGATGTTTACTCAATACTATCATTTTAAATCTACAATTAAACCGGCATCGCGTTGGCAGA +AGTATAAGGATCGGATGGACATTTGTTTGAGCGACTGTAAGACTTATTTAGCAAAGAAATACGAAGAAAT +TAAGAAAATTCTTGCCGAACATCCTATCTTGACGATTTTAGGAATGATAGGGGTTGCCTTATCTGCTCTG +GCAATGTACTATTGGTTTTCTAAATCGTTGGATCCTGTAGAAGCCGAGGTTGCTCCTTCTGGTGACGCTA +AAACAGTGCGCTTACCAAGGAAACTCGTTGAGATTGGTGCTTCTGGAGATGTTAAAACACAGAAGATTGT +GAAACCCGTTGTAGAGACCGAATGGCATCGTAACAATAAAGGAGAGATTGAAATTTCTTGTGATGAATGT +GGTATGCATAGGATGTCTGCATTTAACAATATGACAGATGAAGAATTTGATAACTGTACATATGAAGATT +TGAATAAGGACCAGAAACGTGAACTTGCCCAGTGGTCTACTAAAGATTCTTGGTTAGGTCGATTCTTTTT +GAGTCGAGATCGCAAGAATAAGGTTGGAATTTGGGCCGAAGTGGGACAATCAGGTGATGTTAAAACAAAT +AAAGCTCAGATTAAACGTGTTGAAGCTGGAGCCGAAGAATTAGTTACTGTTGCTTTAACTCAAGGTTGTT +CTGATGATGCTGCACACAATTTGATGATTGACGTTTTCCAAAAAAATACATATAGAATGTCATACTTCCG +TGGAGACAAGCGTTATCAACTTGGAAATTGTACATTTGTTCGTGGTTGGTCTTTTATTATGCCATATCAT +TTTGTACAGGCTGTGTTTGCGCGAAGATTGCCACCTAACACGATTATTTCTTTGTCCCAACAGATGTCTG +AAGATTTAATGCAAATTCCATTATCACACTTCTTTTCTGCTGGTGTTGACAATTTTTATTTAACAGATAA +TTGTGTGCGCTTGCCATTTAAAAATGGAGATTTTCGCGATTGTGTTATGGTTAATTTACATTCACGAATG +TGCACGCCACATCGTGATTTGGTTCGGCATTTTATTTTAACTTCTGATCAAGGTAAATTGAAGGGATCTT +TTAGTGGTGCAATGGCAACTTTCCATGTTAACAATATGGGTTTATATCGTGTTTATAATTGGCTAAATGC +AGTTCGTCCTTGCGATAAAAAGATAGAAATTTTCCACCCTGAAGATGGTTTTGAGTATCCCGAAGAATCA +TATATTCAACGTGACTGTTATGAATATAATGCACCGACTCGTACTGGGGATTGTGGATCTATTATTGGAT +TGTATAATAAATATTTAGAAAGGAAAATCATCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGG +TTATGCGTGCCCTTTGACACAAGAGTGTCTTGAGACTGCTTTTTCTGCTTTAGTAAATAAAAATAAGAAG +AATATTTCCTCACAATTTTATTATGAAATACCCAATATGGTTGATCCACTCGGTGATAGTAGTGTTCCTG +AAGGTAAGTTTTACGCTTTAGGAAAGTCATCTATTCGTGTGGGACAGGCAGTTAATTCGTCCATAATTCC +TTCTCGAATTTATGGGAAGTTGTCTGTTCCTACAATGAAACCAGCACTACTCAAGCCAACGATTCTGAAT +AACAAAGTACATAATCCTTTATTGTCGGGACTTAAGAAATGTGGTGTAGACACTGCGGTCTTGAGTGATG +ATGAAGTTTTGAGTGCTTCACAAGATGTTTGTCGTGTTATGTTGAACCAATATAATAAAAATTTGAATAA +AACAAAGTATCAACGCATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGT +GCTATTAATCGTACAACGTCACCAGGATTTCCTTATGCACAAATGAAAAGAAATGCTCCAGGTAAACAGC +AATGGATGGGTTTTGGTGAAGAATTTGATTTTACAAGTAATTATGCACTAGCTTTGCGGAAAGATGTTGA +ACAACTCATTGAAGATTGTGCTAGCGGAAAAATATCTAATGTCATTTTTGTAGATACATTGAAAGATGAG +CGACGCGATATAGCTAAAGTAAATGTAGGTAAAACGCGTGTATTTTCTGCAGGTCCTCAACATTTTGTAG +TTGCATTTCGCCAATATTTCCTACCTTTTGCTGCTTGGTTGATGCATAATCGCATTTCGAACGAAGTAGC +AGTTGGCACTAATGTTTATTCATCTGATTGGGAACGTATTGCAAAACGTCTTAAAACAAAAGGTAGTCAC +GTCATTGCGGGGGACTTTGGAAATTTCGATGGATCTTTAGTAGCACAAATTTTGTGGGCCATATTTTGGG +AAATTTTTGTTGTATGGCTTAAGCAATTTATTGATATAGAGAATTCAGAAGGAAAACGTATTTTATGTAT +CTGTCTTGGTTTGTGGTCACATTTAGTTCACTCTGTTCATATTTATGAAGATAATGTATATATGTGGACT +CATTCTCAACCTTCTGGCAATCCTTTCACTGTTATTATTAATTGCTTGTATAATTCGATTATTATGCGAC +TGTCATGGATTCGTGTGATGGAGAAATTTCAACCTAGACTTAAGTCCATGAAGTGGTTCAACGAATATGT +CGCCTTGATAACATATGGTGACGACAATGTTTTAAACATTGATGCAAAGGTTGTGGAATGGTTTAATCAG +ATTAACATTAGTGAGGTTATGACTGAAATGCGACATGAATATACGGACGAAGCTAAAACTGGTGATATTG +TTAAATCTCGTAAATTAGAAGATATTTTCTTTTTGAAGAGAAAATTTCGTTTTAGCCCAGAATTACAACG +CCATGTTGCTCCATTGAAGATCGAAGTTATTTATGAAATGTTGAATTGGTCTCGCCGCTCTATAGATCCA +GATGAAATCTTGATGTCGAACATTGAAACGGCTTTTCGTGAAGTAGTTTACCACGGAAAAGAAGAATACG +ATAAACTAAGGTCAGCGGTATTGGCGTTGAAGGTACCCCAGGAACTTCCTGAAAACCCTCAGATTTTGAC +GTACAACCAATATTTGCACGATATTGAATATCTTGCGGACCCTTTGTACGACTTTTAGTTAAGATGTGAT +CTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT +ATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCTCTCTGCTTCTTAT +ATGATTAGGTTGTCATTTAGAATAAGAAAATAACCTGCTAACTTTCAAACAAATAATAATAACATTGAAA +ATGAAGATCGGAAAATTACTTCCGAGCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAG +TACCACTGCGGTGCCTGATATCGTTAGTCTTTCAACAGATTATTTGTCTATGACTACTCGTGAAGATCGT +ATCCACACGATTAAAGATTTTCTTTCTCGTCCAATTATAATTCAAACTGGTCTTTGGTCTTCCGCTACAA +CTGCCGAAACTCAATTGTATACTGCTAATTTCCCTGAAGTGTTCATTTCTAATACTATGTATCAAGAAAA +GTTGCGTGGGTTCGTGGGTTTGCGAGCAACTTTAGTCATTAAAGTGCAAGTGAATTCCCAACCTTTCCAG +CAAGGACGATTGATGCTACAGTATTATCCGTATGCACAGTATATGCCTAACCGTGTTTCTTTGGTGAATT +CCACTCTCCAAGGACGCTCTGGTTGTCCTCGAACAGATTTGGATTTGAGCGTTGGTACGGAAGTTGAAAT +GCGAATTCCTTATGTGTCCCCTCATGTATATTACAATCTTATTACTGGACAAGGATCATTTGGCGCTATA +TATTTGGTTGTATATAGCCAACTAAGAGATCAAGTTACAGGAACAGGTTCTGTTGAATATACTGTTTGGG +CTCATTTGGAAGATGTAGATGTGCAATACCCGACCGGTGCAAACATTTTCACGGGTAGCTCTCCAAATTT +TGCCTCTTTGGGTCAGAAAATGAGTGATGGAAAATTCACTGAAAAAGACTTGAGAGATATTTGGACTTCA +AAAGCGTACAATAAACAACCAGACAAAATTTTCGCACAAGTGGCTTCTGAAATAACACAACTCAAAGAAT +CAGGAACAATTAGTTCTGGAATTGGACAAGTTTCTGAAGGTCTTTCTACCATGTCTAAAATCCCTATACT +CGGAAATATGTTTACAAAACCCGCCTGGATTTCAGCTCAAGTATCTAATATCTTCAAGATGCTTGGTTTT +TCAAAACCCACTGTTCAAGGTCTTCCTTGTGAATCGAAACTGCGTGGTCAAGTTCGAATGGCGAATTTTG +ATGGCGCTGATACATCACATAAATTGGCTTTGTCTGCCCAAAACGAAATTGAAACAAAATCTGGACTTTC +TGGAACTTCTCCTGATGAAATGGATTTATCACACGTCCTTTCCATACCAAATTTTTGGGATCGTTTTACT +TGGAACACAACCGATGCCACTAGTTCTATTTTATGGGATAATTATGTTACACCAATGAAAATTAAACCAT +ATTCCTCTACAATATTAGATAGATTTAGATGCACTCATATGGGTTTTGTAGCCAACACACACGGTTATTG +GTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGACGTTTACGCATTAGT +TTTATTCCATTTTATTATAATACGACTATATCTGCAGGAGTTCCCGATGTTTCTCGTACCCAAAAAGTAA +TCGTTGATCTGCGCACCTCTACAGAAGTCTCTTTCACTATTCCGTATGTGTCTTCACGACCTTGGATGTA +CTGTATTCGTCCTGAAGCTTCGTGGCTTGGAACCGATAATGCTTTGATGTACAACGCCGTTACGGGTATA +GTGAGAGTTGAGGTTCTTAACCAGTTGGTTGCCGCTAACAACGTGTTTCAATCTATAGACACTATTGTTG +AAGTTAGTGGTGGTCCTGATTTAACTTTTGCAGCACCAATGGCTCCCTCTTATGTTCCTTATTCTGGAGG +TTTTACTTTAGCAGATGATGCGGCAGCAAAGAAACAGCGTGAGGAGGAGTATGACAACAACATACCTCAA +ACTATTTCTAATCGTGGAAAACGTGAGGTTGAAGATGCTCGTATTGTTGCGCAAGTAATGGGTGAAGATT +TAGCTATTCAAAGAAACGATGCTCAACATGGTGTTCATCCAATGACTATAGACACTCATAAGATCGACTC +AAATTGGTCTCCGGAAGCGCATTGTATTGGTGAAAAGATTATGTCTATTCGCCAATTGATTAAGCGTTTT +GGCATGGCTTTGAACTCCTTGAATTTGATAAGTGATGCACCAAACACCTTGATAGCACCATTTTCAGTTC +AGCACCCAACTCCTGTTGTTGCCCCTGCTGAACCCATGTCCCTTTTTGAATATTATTATTTCATTTATGG +ATTTTGGAGAGGTGGCATGAGATTTAAACTTCAGGCAGTACGTACAAACTCAGCAGAAACATCAGTTAAA +ACCGACACAACTTGGACTGTAAATTTGTGGAATTCTGTACAAGATTCTTTTAATTCTCTAATTAATGTAT +TTAGTACTACTGATTACCCTATAAAATCCACAGGAGCACTTCCAGCCGGAACAAGCGGTTTTGGCAATTC +GATGACGTATATAGATCCTGAGGTTGAAGGTTTTATGGAATTTGAGATTCCATATTATAATATCTCCCAT +ATTTCTCCAGCTACAACCTATGTTCGTGGTACTGAATCTCCTATTACAATTAATAGTGTCTTGCGTGGAC +ATTTGCCACCACAAATTGTGGCTGTTGCACCACAGGGCACTATTGCCACTACAGATGTAGTGAACGCTCA +ATTTGCTCGTGCTCCTTCTGACGACTTTTCATTTATGTATCTCGTTGGTGTTCCACCACTTACCAACGTC +GCTCGTCCCTAACTCCCTTACTATTCTGGATCCTTTAAAATTTATTAGGATAGACAAAAATTAACTCTAT +ATTAGATAGTATTAGATTAAGTTTCTTTTTGGTTTTGGGTTTTATTCAGTAACTATCTGCCCTGCTTACA +CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTTTTACTAATTT +TTAGTCAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCAATTTC +GAATTAAAATAATAGTTGTTTTCT +>gi|300871995|gb|GU983911.2|_Drosophila_C_virus_isolate_ZW122_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAATTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|300871993|gb|GU983910.2|_Drosophila_C_virus_isolate_Ez10_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|300871991|gb|GU983909.2|_Drosophila_C_virus_isolate_RG7_polyprotein_gene,_partial_cds +CGAGGCACCTGTAGTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGAT +>gi|300871989|gb|GU983908.2|_Drosophila_C_virus_isolate_RC18_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871987|gb|GU983906.2|_Drosophila_C_virus_isolate_Kn134_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGGATACTTTTCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATAC +>gi|300871985|gb|GU983905.2|_Drosophila_C_virus_isolate_KN5_polyprotein_gene,_partial_cds +TGAGGCACCTGTGTTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATCCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871983|gb|GU983902.2|_Drosophila_C_virus_isolate_Hi99.18_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGGGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|300871981|gb|GU983901.2|_Drosophila_C_virus_isolate_FSP_polyprotein_gene,_partial_cds +CATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTA +TCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTA +GATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCG +TCGACAAATCAAAAATCGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGAT +GGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGA +ATACTCTTCCCGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTT +AAAAGAAGATATCATGAA +>gi|300871979|gb|GU983900.2|_Drosophila_C_virus_isolate_AL7_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGATTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTAGTGATAT +>gi|300871977|gb|GU983899.2|_Drosophila_C_virus_isolate_AkGB1_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|300871975|gb|GU983898.2|_Drosophila_C_virus_isolate_vir_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|300871973|gb|GU983897.2|_Drosophila_C_virus_isolate_G96.232_polyprotein_gene,_partial_cds +TTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGA +TGTTGAAACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACA +CGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAA +TCAAAAATAGGAAAATTTATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAA +AGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTC +CCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAG +ATATTATGAAG +>gi|300871971|gb|GU983896.2|_Drosophila_C_virus_isolate_G96.45_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAATTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TT +>gi|300871969|gb|GU983894.2|_Drosophila_C_virus_isolate_16a10_polyprotein_gene,_partial_cds +CTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTGGACCCGTTCAATCGCGCCC +CGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAA +ATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGGATGTTTG +ATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTT +GACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAATATTCAAGCCACTGTGATT +GATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTTTTGGTGATAT +>gi|300871967|gb|GU983893.2|_Drosophila_C_virus_isolate_Tana11_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871965|gb|GU983892.2|_Drosophila_C_virus_isolate_psjmg_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTAGTGATAT +>gi|300871963|gb|GU983891.2|_Drosophila_C_virus_isolate_PS94_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871961|gb|GU983890.2|_Drosophila_C_virus_isolate_Baf153_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871959|gb|GU983889.2|_Drosophila_C_virus_isolate_Bam73_L_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATCCAAGCCACTGTGATTGATACAACAGACAGGTGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTGATAT +>gi|300871957|gb|GU983888.2|_Drosophila_C_virus_isolate_Bam73_H_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTAGTGATAT +>gi|300871955|gb|GU983885.2|_Drosophila_C_virus_isolate_16a9_polyprotein_gene,_partial_cds +GTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCAA +AACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTGGACCCGTTCA +ATCGCACCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGA +GACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATTAAAAATAGGAAAATTTACTCCCAAG +GAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAA +TGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCC +ACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTTTTAGTGATAT +>gi|300871953|gb|GU983884.2|_Drosophila_C_virus_isolate_Tam15_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCAAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCACCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATTAAAAATAGGAAAATT +TACTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTAGTGATAT +>gi|300871951|gb|GU983883.2|_Drosophila_C_virus_isolate_Tam11_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATCCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871949|gb|GU983882.2|_Drosophila_C_virus_isolate_ZW141_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTGATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTAACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATCCAAGCCACTGTGATTGATACAACAGACAGATGTGTTTCTTTAAAAGAAGATATCATGAAGATTGTT +TTAGTTATAT +>gi|300871947|gb|GU983881.2|_Drosophila_C_virus_isolate_HB2_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAATTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|300871945|gb|GU983880.2|_Drosophila_C_virus_isolate_Safr10_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGAT +>gi|300871943|gb|GU983879.2|_Drosophila_C_virus_isolate_mel15_L_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGGTTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAATACTCTTCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTGTGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTAGTGATAT +>gi|300871941|gb|GU983878.2|_Drosophila_C_virus_isolate_mel15_H_polyprotein_gene,_partial_cds +TGAGGCACCTGTATTTAAATGTACTTGTAGATTTCTTGATTATACCATTATGACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCGCGTCAAATGTTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACATTAATTAAAGAGTTGCGTCGACAAATCAAAAATCGGAAAATT +TATTCCCAAGGAATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAATATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTAGTGATAT +>gi|300871939|gb|GU983877.2|_Drosophila_C_virus_isolate_Linz44_polyprotein_gene,_partial_cds +CGAGGCACCTGTATTTAAATGTACTTGTAGATTCCTTGGTTATACCATTATAACTCAAGGCATCGGTAAG +AAGAATCCGAAACAGGAAGCTGCACGTCAGATGTTGCTCTTGTTATCAGGAGATGTTGAAACTAACCCTG +GACCCGTTCAATCGCGCCCCGTGTATTATCGCTACAACGACCCTAGATATACACGGTTGGAAAAAGCTAT +TGAACGTCGAGACGATAAAATTAAAACACTAATTAAAGAATTGCGTCGACAAATCAAAAATAGGAAAATT +TATTCCCAAGGGATGTTTGATAAATTAACTAAACAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTG +AACAGATGAATGGAAATTTGACTCGTATTTGTGATTTCCTAGAGAACACTCTCCCCGGGTTACAAGCAAA +TATTCAAGCCACTGTGATTGATACAACAGACAAGTATGTTTCTTTAAAAGAAGATATTATGAAGATTGTT +TTGGTGATAT +>gi|2388672|gb|AF014388.1|_Drosophila_C_virus_strain_EB,_complete_genome +TTTATATCGTGTGTACATATAAATATGTACACACGGCTTTTAGGTAGAATATTGTTTTCAATGTTGATTT +TAAAGGTAACTTTGGTTATTATGCTTTACGGTTTTCATTGTTGATGGTATTTGTGGCCTGCGGTCCCTAA +TTGTTGAATTATTTATTCTGATACGTTGTTTTCATTGTTGATGGTAAGGATTCTTATTTTGAAGTGGTTT +TTCAGAAGATAACTCTAAATATGAATTATGCCTTATTGTTTTCAATGTTGATGGCCTTCGTTTAAATACT +CTTTGTTAATGACGGTAATCAAAGATTACATCTCAAACTTAGATTAATATTTTTAAGTAGGGTATACTGA +GTTAGTCCTCTCTCTTTACTGATTTTGATATCTGGTAATTGACTTCGAAGAAAGATGCGTCTTTTGGATT +TGTAATGACTGGGCCTTAAGTTCATAGGTGTTATTACATGGAGGAACACATTACTTTGGTTGATGATGAT +GTTTTGATGATGACTTTCAATGTATGTGCTTATGTTAAGCCTGACATAAGAACTTACTAGTTTGCATAAT +GCAAAGGGTTAGTATATGATTTTTAGTATGTGGATTTTGACACTGCCTTTGATTAGGATGTGTGAATGAT +TTTGAAACATATTAAGATGTTTATACGAGCGTGTTGTTTACTATTTTCAGGATATGTGGAAGCGGTTGTG +TATGATCTATACGCACATTTAGTTCCCAGAGGGCGTTGTCGTCTCCCCCTAAGCAAGGGAGAAACACGTG +GCACATGATCTTGCGCTTAACGATAAAAATGGAATCTGATAAAAGTATGGCCTGTTTAAATAGAATTTTG +ATGAATAAGATGATGTTTGTGGAAGATAAGATCTCTACCCTTAAGATGGTTGCTGATTATTATCAAAAAG +AAGTAAAGTATGATTTTGATGCAGTTGAATCTCCCCGTGAGGCACCTGTATTTAGATGTACTTGTCGATT +CCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATG +TTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGTT +ACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTAAT +TAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAA +CAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTG +ATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAA +ATATGTTTCTTTAAAAGAGGATATTATGAAGATTGTTTTAGTGATATTGCTTGTTCGTCTTTTAATGGTT +TGGAAGAAGTATCGTGCTTCTCTGTGTGTTATTTTAATCTTTATTTTTAAATTTTATGGATTCGATCAAA +AGTTGATTGATTTAATTATGGATTTGAAGAATAAAATATTTTCACAGGGTGCATTGGAAGATACAGTTGA +GGAGGTTGTATATCATCCTTGGTTCCATACGTGTGGAAAAATCATCTTTGCGGTTATGGCTTTCTTAACA +ATTAAGAAAATTCCTGGTAAACAGGATTGGGATAGTTACATAACACGTTTAGATCGTATCCCAAAATCTA +TTGAGGGAGCTAAAAAGATCACTGATTACTGTTCAGAATATTTTAATATTGCTAATGATCAGATCAAGAT +GATGGTTCTTGGAAAGACTAAAGAAGAATTGCAACGTGCTAATGGACTATATGGAGAAATTCAAGCTTGG +GCTCAAGAGGTTCGCCAGTATTTGGAATTGGATCAACGGAATAAAATTGATCTAGATACTGAAACCGCAA +ATCGTGTTGAACAACTTTGGATAAAGGGCTTGAAATTCAAGAGTGAACCCCTTTTGAGTAAGGAAATGTC +AGCTTTAGTTCATACAACTCTTTTACCAGCTAAGCAATTGTACGAGTATGTATCGTGTTCTCCTGTTAAA +GGGGGAGGACCACGTATGCGTCCAATTTGTTTATGGTTGGTAGGTGAATCAGGAGTTGGTAAGACTGAAA +TGGTATATCCATTGTGCATTGATGTTCTTCGGGAAATGGGGATGATTAAGAAAGATGATTTTCATCATCA +AGTTTATGGTCGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT +GATGCATTTCAGAAGAAAGATGACAAAACAGCAGCTAACCCAGAAATTTTTGAGGTTATTCGCTCTTGCA +ACACTTTTCCTCAGCATTTACATATGGCAGCTCTTCATGATAAAAATACTTTTTCTGCTGCTGAATTACT +CTTATATACCACTAATGATTATAATGTTAAGCTGGAATCTATTACTTTTCCCGATGCTTTCTTTAATCGT +ATGGGCGATATGGCTTATAAAGTTAGTCCTAAGAAAGAGTATGGTATTGAAACCGAGAAAGGGAATTCAG +GTAAAACTTATTTAAAATTGGATAAGAGTAAATTGGACAAAACAAAAGCTATTGACCTTTCAGTGTATGA +ATTCCAAAAAATTGTACGTGACGAGAAAAGTGATGCAGGTTGGATTGATTCTGGATCACCCTTGGACTAT +GAAGATTTTGCTAAATTAGTGTGTTCAAAATGGAAAGAAGCGAAACAATCTTCAATGAATAAATTGAAAT +TTTTGGAAGAATATGCTATTCGTGCTCAGGTTGGATCAGAAGAAAATTCTGAATATGGTGATTGTATAGA +TTTTGTCGATGATATTGCCAAACGCTTACAAAAAGGTGAAACTCTTGAAGAAATAGAGTTTGATTATGCC +TCAGATCCAGAGATGTTTACTCAATACTATCATTTTAAATCTACAATTAAACCGGCATCGCGTTGGCAGA +AGTATAAGGATCGGATGGACATTTGTTTGAGCGACTGTAAGACTTATTTAGCAAAGAAATACGAAGAAAT +TAAGAAAATTCTTGCCGAACATCCTATCTTGACGATTTTAGGAATGATAGGGGTTGCCTTATCTGCTCTG +GCAATGTACTATTGGTTTTCTAAATCGTTGGATCCTGTAGAAGCCGAGGTTGCTCCTTCTGGTGACGCTA +AAACAGTGCGCTTACCAAGGAAACTCGTTGAGATTGGTGCTTCTGGAGATGTTAAAACACAGAAGATTGT +GAAACCCGTTGTAGAGACCGAATGGCATCGTAACAATAAAGGAGAGATTGAAATTTCTTGTGATGAATGT +GGTATGCATAGGATGTCTGCATTTAACAATATGACAGATGAAGAATTTGATAACTGTACATATGAAGATT +TGAATAAGGACCAGAAACGTGAACTTGCCCAGTGGTCTACTAAAGATTCTTGGTTAGGTCGATTCTTTTT +GAGTCGAGATCGCAAGAATAAGGTTGGAATTTGGGCCGAAGTGGGACAATCAGGTGATGTTAAAACAAAT +AAAGCTCAGATTAAACGTGTTGAAGCTGGAGCCGAAGAATTAGTTACTGTTGCTTTAACTCAAGGTTGTT +CTGATGATGCTGCACACAATTTGATGATTGACGTTTTCCAAAAAAATACATATAGAATGTCATACTTCCG +TGGAGACAAGCGTTATCAACTTGGAAATTGTACATTTGTTCGTGGTTGGTCTTTTATTATGCCATATCAT +TTTGTACAGGCTGTGTTTGCGCGAAGATTGCCACCTAACACGATTATTTCTTTGTCCCAACAGATGTCTG +AAGATTTAATGCAAATTCCATTATCACACTTCTTTTCTGCTGGTGTTGACAATTTTTATTTAACAGATAA +TTGTGTGCGCTTGCCATTTAAAAATGGAGATTTTCGCGATTGTGTTATGGTTAATTTACATTCACGAATG +TGCACGCCACATCGTGATTTGGTTCGGCATTTTATTTTAACTTCTGATCAAGGTAAATTGAAGGGATCTT +TTAGTGGTGCAATGGCAACTTTCCATGTTAACAATATGGGTTTATATCGTGTTTATAATTGGCTAAATGC +AGTTCGTCCTTGCGATAAAAAGATAGAAATTTTCCACCCTGAAGATGGTTTTGAGTATCCCGAAGAATCA +TATATTCAACGTGACTGTTATGAATATAATGCACCGACTCGTACTGGGGATTGTGGATCTATTATTGGAT +TGTATAATAAATATTTAGAAAGGAAAATCATCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGG +TTATGCGTGCCCTTTGACACAAGAGTGTCTTGAGACTGCTTTTTCTGCTTTAGTAAATAAAAATAAGAAG +AATATTTCCTCACAATTTTATTATGAAATACCCAATATGGTTGATCCACTCGGTGATAGTAGTGTTCCTG +AAGGTAAGTTTTACGCTTTAGGAAAGTCATCTATTCGTGTGGGACAGGCAGTTAATTCGTCCATAATTCC +TTCTCGAATTTATGGGAAGTTGTCTGTTCCTACAATGAAACCAGCACTACTCAAGCCAACGATTCTGAAT +AACAAAGTACATAATCCTTTATTGTCGGGACTTAAGAAATGTGGTGTAGACACTGCGGTCTTGAGTGATG +ATGAAGTTTTGAGTGCTTCACAAGATGTTTGTCGTGTTATGTTGAACCAATATAATAAAAATTTGAATAA +AACAAAGTATCAACGCATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGT +GCTATTAATCGTACAACGTCACCAGGATTTCCTTATGCACAAATGAAAAGAAATGCTCCAGGTAAACAGC +AATGGATGGGTTTTGGTGAAGAATTTGATTTTACAAGTAATTATGCACTAGCTTTGCGGAAAGATGTTGA +ACAACTCATTGAAGATTGTGCTAGCGGAAAAATATCTAATGTCATTTTTGTAGATACATTGAAAGATGAG +CGACGCGATATAGCTAAAGTAAATGTAGGTAAAACGCGTGTATTTTCTGCAGGTCCTCAACATTTTGTAG +TTGCATTTCGCCAATATTTCCTACCTTTTGCTGCTTGGTTGATGCATAATCGCATTTCGAACGAAGTAGC +AGTTGGCACTAATGTTTATTCATCTGATTGGGAACGTATTGCAAAACGTCTTAAAACAAAAGGTAGTCAC +GTCATTGCGGGGGACTTTGGAAATTTCGATGGATCTTTAGTAGCACAAATTTTGTGGGCCATATTTTGGG +AAATTTTTGTTGTATGGCTTAAGCAATTTATTGATATAGAGAATTCAGAAGGAAAACGTATTTTATGTAT +CTGTCTTGGTTTGTGGTCACATTTAGTTCACTCTGTTCATATTTATGAAGATAATGTATATATGTGGACT +CATTCTCAACCTTCTGGCAATCCTTTCACTGTTATTATTAATTGCTTGTATAATTCGATTATTATGCGAC +TGTCATGGATTCGTGTGATGGAGAAATTTCAACCTAGACTTAAGTCCATGAAGTGGTTCAACGAATATGT +CGCCTTGATAACATATGGTGACGACAATGTTTTAAACATTGATGCAAAGGTTGTGGAATGGTTTAATCAG +ATTAACATTAGTGAGGTTATGACTGAAATGCGACATGAATATACGGACGAAGCTAAAACTGGTGATATTG +TTAAATCTCGTAAATTAGAAGATATTTTCTTTTTGAAGAGAAAATTTCGTTTTAGCCCAGAATTACAACG +CCATGTTGCTCCATTGAAGATCGAAGTTATTTATGAAATGTTGAATTGGTCTCGCCGCTCTATAGATCCA +GATGAAATCTTGATGTCGAACATTGAAACGGCTTTTCGTGAAGTAGTTTACCACGGAAAAGAAGAATACG +ATAAACTAAGGTCAGCGGTATTGGCGTTGAAGGTACCCCAGGAACTTCCTGAAAACCCTCAGATTTTGAC +GTACAACCAATATTTGCACGATATTGAATATCTTGCGGACCCTTTGTACGACTTTTAGTTAAGATGTGAT +CTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT +ATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCTCTCTGCTTCTTAT +ATGATTAGGTTGTCATTTAGAATAAGAAAATAACCTGCTAACTTTCAAACAAATAATAATAACATTGAAA +ATGAAGATCGGAAAATTACTTCCGAGCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAG +TACCACTGCGGTGCCTGATATCGTTAGTCTTTCAACAGATTATTTGTCTATGACTACTCGTGAAGATCGT +ATCCACACGATTAAAGATTTTCTTTCTCGTCCAATTATAATTCAAACTGGTCTTTGGTCTTCCGCTACAA +CTGCCGAAACTCAATTGTATACTGCTAATTTCCCTGAAGTGTTCATTTCTAATACTATGTATCAAGAAAA +GTTGCGTGGGTTCGTGGGTTTGCGAGCAACTTTAGTCATTAAAGTGCAAGTGAATTCCCAACCTTTCCAG +CAAGGACGATTGATGCTACAGTATTATCCGTATGCACAGTATATGCCTAACCGTGTTTCTTTGGTGAATT +CCACTCTCCAAGGACGCTCTGGTTGTCCTCGAACAGATTTGGATTTGAGCGTTGGTACGGAAGTTGAAAT +GCGAATTCCTTATGTGTCCCCTCATGTATATTACAATCTTATTACTGGACAAGGATCATTTGGCGCTATA +TATTTGGTTGTATATAGCCAACTAAGAGATCAAGTTACAGGAACAGGTTCTGTTGAATATACTGTTTGGG +CTCATTTGGAAGATGTAGATGTGCAATACCCGACCGGTGCAAACATTTTCACGGGTAGCTCTCCAAATTT +TGCCTCTTTGGGTCAGAAAATGAGTGATGGAAAATTCACTGAAAAAGACTTGAGAGATATTTGGACTTCA +AAAGCGTACAATAAACAACCAGACAAAATTTTCGCACAAGTGGCTTCTGAAATAACACAACTCAAAGAAT +CAGGAACAATTAGTTCTGGAATTGGACAAGTTTCTGAAGGTCTTTCTACCATGTCTAAAATCCCTATACT +CGGAAATATGTTTACAAAACCCGCCTGGATTTCAGCTCAAGTATCTAATATCTTCAAGATGCTTGGTTTT +TCAAAACCCACTGTTCAAGGTCTTCCTTGTGAATCGAAACTGCGTGGTCAAGTTCGAATGGCGAATTTTG +ATGGCGCTGATACATCACATAAATTGGCTTTGTCTGCCCAAAACGAAATTGAAACAAAATCTGGACTTTC +TGGAACTTCTCCTGATGAAATGGATTTATCACACGTCCTTTCCATACCAAATTTTTGGGATCGTTTTACT +TGGAACACAACCGATGCCACTAGTTCTATTTTATGGGATAATTATGTTACACCAATGAAAATTAAACCAT +ATTCCTCTACAATATTAGATAGATTTAGATGCACTCATATGGGTTTTGTAGCCAACACACACGGTTATTG +GTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGACGTTTACGCATTAGT +TTTATTCCATTTTATTATAATACGACTATATCTGCAGGAGTTCCCGATGTTTCTCGTACCCAAAAAGTAA +TCGTTGATCTGCGCACCTCTACAGAAGTCTCTTTCACTATTCCGTATGTGTCTTCACGACCTTGGATGTA +CTGTATTCGTCCTGAAGCTTCGTGGCTTGGAACCGATAATGCTTTGATGTACAACGCCGTTACGGGTATA +GTGAGAGTTGAGGTTCTTAACCAGTTGGTTGCCGCTAACAACGTGTTTCAATCTATAGACACTATTGTTG +AAGTTAGTGGTGGTCCTGATTTAACTTTTGCAGCACCAATGGCTCCCTCTTATGTTCCTTATTCTGGAGG +TTTTACTTTAGCAGATGATGCGGCAGCAAAGAAACAGCGTGAGGAGGAGTATGACAACAACATACCTCAA +ACTATTTCTAATCGTGGAAAACGTGAGGTTGAAGATGCTCGTATTGTTGCGCAAGTAATGGGTGAAGATT +TAGCTATTCAAAGAAACGATGCTCAACATGGTGTTCATCCAATGACTATAGACACTCATAAGATCGACTC +AAATTGGTCTCCGGAAGCGCATTGTATTGGTGAAAAGATTATGTCTATTCGCCAATTGATTAAGCGTTTT +GGCATGGCTTTGAACTCCTTGAATTTGATAAGTGATGCACCAAACACCTTGATAGCACCATTTTCAGTTC +AGCACCCAACTCCTGTTGTTGCCCCTGCTGAACCCATGTCCCTTTTTGAATATTATTATTTCATTTATGG +ATTTTGGAGAGGTGGCATGAGATTTAAACTTCAGGCAGTACGTACAAACTCAGCAGAAACATCAGTTAAA +ACCGACACAACTTGGACTGTAAATTTGTGGAATTCTGTACAAGATTCTTTTAATTCTCTAATTAATGTAT +TTAGTACTACTGATTACCCTATAAAATCCACAGGAGCACTTCCAGCCGGAACAAGCGGTTTTGGCAATTC +GATGACGTATATAGATCCTGAGGTTGAAGGTTTTATGGAATTTGAGATTCCATATTATAATATCTCCCAT +ATTTCTCCAGCTACAACCTATGTTCGTGGTACTGAATCTCCTATTACAATTAATAGTGTCTTGCGTGGAC +ATTTGCCACCACAAATTGTGGCTGTTGCACCACAGGGCACTATTGCCACTACAGATGTAGTGAACGCTCA +ATTTGCTCGTGCTCCTTCTGACGACTTTTCATTTATGTATCTCGTTGGTGTTCCACCACTTACCAACGTC +GCTCGTCCCTAACTCCCTTACTATTCTGGATCCTTTAAAATTTATTAGGATAGACAAAAATTAACTCTAT +ATTAGATAGTATTAGATTAAGTTTCTTTTTGGTTTTGGGTTTTATTCAGTAACTATCTGCCCTGCTTACA +CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTTTTACTAATTT +TTAGTCAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCAATTTC +GAATTAAAATAATAGTTGTTTTCT +>gi|236164939|emb|GN351241.1|_Sequence_1005_from_Patent_WO2007130519 +ATCATAAATGACAATTTTCTGTCCTTTATAACCATCCCAGAATTCAGTTTCAACTTGACG +>gi|236164937|emb|GN351240.1|_Sequence_1004_from_Patent_WO2007130519 +CGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT +>gi|236164934|emb|GN351239.1|_Sequence_1003_from_Patent_WO2007130519 +CCAATCCTGTTTACCAGGAATTTTCTTAATTGTTAAGAAAGCCATAACCGCAAAGATGAT +>gi|236164931|emb|GN351238.1|_Sequence_1002_from_Patent_WO2007130519 +ATCATCTTTGCGGTTATGGCTTTCTTAACAATTAAGAAAATTCCTGGTAAACAGGATTGG +>gi|236164929|emb|GN351237.1|_Sequence_1001_from_Patent_WO2007130519 +TGCAATTACTGTCAGTAAGCAGGAAAATTGCGAAAAACCTGGTAGCCCCTAAGGACTCTG +>gi|236164927|emb|GN351236.1|_Sequence_1000_from_Patent_WO2007130519 +CAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCA +>gi|236164925|emb|GN351235.1|_Sequence_999_from_Patent_WO2007130519 +AAATTTTGAAAATACAAAATAAAAGAGTCCAGAAGGGACAAGAATTAAAAATAATACCCG +>gi|236164923|emb|GN351234.1|_Sequence_998_from_Patent_WO2007130519 +CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTT +>gi|236164921|emb|GN351233.1|_Sequence_997_from_Patent_WO2007130519 +TCCAGAATGAAATTGAGTCTTAACAAATTTAAAAGTATAAACTATTGATCCACACCAATA +>gi|236164917|emb|GN351232.1|_Sequence_996_from_Patent_WO2007130519 +TATTGGTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGA +>gi|236164915|emb|GN351231.1|_Sequence_995_from_Patent_WO2007130519 +GGCACCGCAGTGGTACTAGGGGTAACTCCTTCACTAGAAAAGTGTACAATCTCTTTTTGC +>gi|236164913|emb|GN351230.1|_Sequence_994_from_Patent_WO2007130519 +GCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAGTACCACTGCGGTGCC +>gi|236164910|emb|GN351229.1|_Sequence_993_from_Patent_WO2007130519 +CCTGGATATTATGGGGCTGCCAATAGGCATCCTGAACAGTAAAACTAAATAGTTAACCTA +>gi|236164908|emb|GN351228.1|_Sequence_992_from_Patent_WO2007130519 +TAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGG +>gi|236164905|emb|GN351227.1|_Sequence_991_from_Patent_WO2007130519 +AGTTAACCTAATTATTAAGATAGCACTACTTCCTTCTTATTAACCTCTCAAAATTGTATA +>gi|236164901|emb|GN351226.1|_Sequence_990_from_Patent_WO2007130519 +TATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT +>gi|236164898|emb|GN351225.1|_Sequence_989_from_Patent_WO2007130519 +ATAGCACACATAAATTCATCATCTTGGGTTCCACGAATAGCTTCCTCATATGTTAAAATG +>gi|236164896|emb|GN351224.1|_Sequence_988_from_Patent_WO2007130519 +CATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGTGCTAT +>gi|236164894|emb|GN351223.1|_Sequence_987_from_Patent_WO2007130519 +GTGTCAAAGGGCACGCATAACCATGCTCTTCTGCATCATTTCCAGCAATGTGCATACCGA +>gi|236164892|emb|GN351222.1|_Sequence_986_from_Patent_WO2007130519 +TCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGGTTATGCGTGCCCTTTGACAC +>gi|92142564|dbj|BD294721.1|_Novel_tertiary_structure_having_ability_to_accelerate_translation_activ +GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT +AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT +CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT +>gi|30014277|dbj|BD177017.1|_Novel_translational_activity-promoting_higher-order_structure +GTTAAGATGTGATCTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAAT +AATTAGGTTAACTATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCT +CTCTGCTTCTTATATGATTAGGTTGTCATTTAGAATAAGAAAATAACCT