Mercurial > repos > drosofff > fetch_fasta_from_ncbi
changeset 2:e9df554f7725 draft
Uploaded
author | drosofff |
---|---|
date | Wed, 20 May 2015 10:07:32 -0400 |
parents | c1d17d173128 |
children | c35b4867c884 |
files | retrieve_fasta_from_NCBI.py |
diffstat | 1 files changed, 23 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/retrieve_fasta_from_NCBI.py Tue May 12 17:42:29 2015 -0400 +++ b/retrieve_fasta_from_NCBI.py Wed May 20 10:07:32 2015 -0400 @@ -24,7 +24,7 @@ python get_fasta_from_taxon.py -i 1638 -o test.out -d protein python get_fasta_from_taxon.py -i 327045 -o test.out -d nuccore # 556468 UIDs """ - +import sys import logging import optparse import time @@ -122,7 +122,19 @@ req = urllib2.Request(url, data) #self.logger.debug("data: %s" % str(data)) req = urllib2.Request(url, data) - response = urllib2.urlopen(req) + serverResponse = False + while not serverResponse: + try: + response = urllib2.urlopen(req) + serverResponse = True + except: # catch *all* exceptions + e = sys.exc_info()[0] + self.logger.info( "Catched Error: %s" % e ) + self.logger.info( "Retrying in 10 sec") + time.sleep(10) +# except urllib2.HTTPError as e: +# serverResponse = False +# self.logger.info("epost error:%s, %s" % (e.code, e.read() ) ) querylog = response.readlines() self.logger.debug("query response:") for line in querylog: @@ -148,7 +160,14 @@ req = urllib2.Request(url, data) self.logger.debug("data: %s" % str(data)) req = urllib2.Request(url, data) - response = urllib2.urlopen(req) + serverResponse = False + while not serverResponse: + try: + response = urllib2.urlopen(req) + serverResponse = True + except urllib2.HTTPError as e: + serverResponse = False + self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) fasta = response.read() if "Resource temporarily unavailable" in fasta: return '' # to reiterate the failed download @@ -191,7 +210,7 @@ fastalines[0] = re.sub(regex, "_", fastalines[0]) cleanseq = "\n".join(fastalines) sane_seqlist.append(cleanseq) -# sane_seqlist[-1] = sane_seqlist[-1] + "\n" # remove to have sequence blocks not separated by two \n + self.logger.info("clean sequences appended: %d" % (len(sane_seqlist) ) ) return "\n".join(sane_seqlist) def get_sequences(self):