Mercurial > repos > drosofff > fetch_fasta_from_ncbi
comparison retrieve_fasta_from_NCBI.py @ 1:c1d17d173128 draft
Uploaded
author | drosofff |
---|---|
date | Tue, 12 May 2015 17:42:29 -0400 |
parents | 4b34f2b5c14e |
children | e9df554f7725 |
comparison
equal
deleted
inserted
replaced
0:4b34f2b5c14e | 1:c1d17d173128 |
---|---|
148 req = urllib2.Request(url, data) | 148 req = urllib2.Request(url, data) |
149 self.logger.debug("data: %s" % str(data)) | 149 self.logger.debug("data: %s" % str(data)) |
150 req = urllib2.Request(url, data) | 150 req = urllib2.Request(url, data) |
151 response = urllib2.urlopen(req) | 151 response = urllib2.urlopen(req) |
152 fasta = response.read() | 152 fasta = response.read() |
153 if "Resource temporarily unavailable" in fasta: | |
154 return '' # to reiterate the failed download | |
153 if self.dbname != "pubmed": | 155 if self.dbname != "pubmed": |
154 assert fasta.startswith(">"), fasta | 156 assert fasta.startswith(">"), fasta |
155 fasta = self.sanitiser(self.dbname, fasta) # | 157 fasta = self.sanitiser(self.dbname, fasta) # |
156 time.sleep(1) | 158 time.sleep(1) |
157 return fasta | 159 return fasta |
210 with open(self.outname, 'w') as out: | 212 with open(self.outname, 'w') as out: |
211 for start in range(0, count, batch_size): | 213 for start in range(0, count, batch_size): |
212 end = min(count, start+batch_size) | 214 end = min(count, start+batch_size) |
213 batch = uids_list[start:end] | 215 batch = uids_list[start:end] |
214 self.epost(self.dbname, ",".join(batch)) | 216 self.epost(self.dbname, ",".join(batch)) |
215 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | 217 mfasta = '' |
216 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | 218 while not mfasta: |
219 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | |
220 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | |
217 out.write(mfasta + '\n') | 221 out.write(mfasta + '\n') |
218 | 222 |
219 | 223 |
220 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | 224 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' |
221 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | 225 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' |