changeset 2:e9df554f7725 draft

Uploaded
author drosofff
date Wed, 20 May 2015 10:07:32 -0400
parents c1d17d173128
children c35b4867c884
files retrieve_fasta_from_NCBI.py
diffstat 1 files changed, 23 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/retrieve_fasta_from_NCBI.py	Tue May 12 17:42:29 2015 -0400
+++ b/retrieve_fasta_from_NCBI.py	Wed May 20 10:07:32 2015 -0400
@@ -24,7 +24,7 @@
 python get_fasta_from_taxon.py -i 1638 -o test.out -d protein
 python get_fasta_from_taxon.py -i 327045 -o test.out -d nuccore # 556468 UIDs
 """
-
+import sys
 import logging
 import optparse
 import time
@@ -122,7 +122,19 @@
         req = urllib2.Request(url, data)
         #self.logger.debug("data: %s" % str(data))
         req = urllib2.Request(url, data)
-        response = urllib2.urlopen(req)
+        serverResponse = False
+        while not serverResponse:
+            try:
+                response = urllib2.urlopen(req)
+                serverResponse = True
+            except: # catch *all* exceptions
+                e = sys.exc_info()[0]
+                self.logger.info( "Catched Error: %s" % e )
+                self.logger.info( "Retrying in 10 sec")
+                time.sleep(10)
+#            except urllib2.HTTPError as e:
+#                serverResponse = False
+#                self.logger.info("epost error:%s, %s" % (e.code, e.read() ) )
         querylog = response.readlines()
         self.logger.debug("query response:")
         for line in querylog:
@@ -148,7 +160,14 @@
         req = urllib2.Request(url, data)
         self.logger.debug("data: %s" % str(data))
         req = urllib2.Request(url, data)
-        response = urllib2.urlopen(req)
+        serverResponse = False
+        while not serverResponse:
+            try:
+                response = urllib2.urlopen(req)
+                serverResponse = True
+            except urllib2.HTTPError as e:
+                serverResponse = False
+                self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) )
         fasta = response.read()
         if "Resource temporarily unavailable" in fasta:
             return '' # to reiterate the failed download
@@ -191,7 +210,7 @@
 				fastalines[0] = re.sub(regex, "_", fastalines[0])
 				cleanseq = "\n".join(fastalines)
 				sane_seqlist.append(cleanseq)
-#		sane_seqlist[-1] = sane_seqlist[-1] + "\n" # remove to have sequence blocks not separated by two \n
+		self.logger.info("clean sequences appended: %d" % (len(sane_seqlist) ) )
 		return "\n".join(sane_seqlist)
 
     def get_sequences(self):