comparison retrieve_fasta_from_NCBI.py @ 1:c1d17d173128 draft

Uploaded
author drosofff
date Tue, 12 May 2015 17:42:29 -0400
parents 4b34f2b5c14e
children e9df554f7725
comparison
equal deleted inserted replaced
0:4b34f2b5c14e 1:c1d17d173128
148 req = urllib2.Request(url, data) 148 req = urllib2.Request(url, data)
149 self.logger.debug("data: %s" % str(data)) 149 self.logger.debug("data: %s" % str(data))
150 req = urllib2.Request(url, data) 150 req = urllib2.Request(url, data)
151 response = urllib2.urlopen(req) 151 response = urllib2.urlopen(req)
152 fasta = response.read() 152 fasta = response.read()
153 if "Resource temporarily unavailable" in fasta:
154 return '' # to reiterate the failed download
153 if self.dbname != "pubmed": 155 if self.dbname != "pubmed":
154 assert fasta.startswith(">"), fasta 156 assert fasta.startswith(">"), fasta
155 fasta = self.sanitiser(self.dbname, fasta) # 157 fasta = self.sanitiser(self.dbname, fasta) #
156 time.sleep(1) 158 time.sleep(1)
157 return fasta 159 return fasta
210 with open(self.outname, 'w') as out: 212 with open(self.outname, 'w') as out:
211 for start in range(0, count, batch_size): 213 for start in range(0, count, batch_size):
212 end = min(count, start+batch_size) 214 end = min(count, start+batch_size)
213 batch = uids_list[start:end] 215 batch = uids_list[start:end]
214 self.epost(self.dbname, ",".join(batch)) 216 self.epost(self.dbname, ",".join(batch))
215 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) 217 mfasta = ''
216 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) 218 while not mfasta:
219 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1))
220 mfasta = self.efetch(self.dbname, self.query_key, self.webenv)
217 out.write(mfasta + '\n') 221 out.write(mfasta + '\n')
218 222
219 223
220 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' 224 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
221 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' 225 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'