Mercurial > repos > drosofff > fetch_fasta_from_ncbi
comparison retrieve_fasta_from_NCBI.py @ 1:c1d17d173128 draft
Uploaded
| author | drosofff |
|---|---|
| date | Tue, 12 May 2015 17:42:29 -0400 |
| parents | 4b34f2b5c14e |
| children | e9df554f7725 |
comparison
equal
deleted
inserted
replaced
| 0:4b34f2b5c14e | 1:c1d17d173128 |
|---|---|
| 148 req = urllib2.Request(url, data) | 148 req = urllib2.Request(url, data) |
| 149 self.logger.debug("data: %s" % str(data)) | 149 self.logger.debug("data: %s" % str(data)) |
| 150 req = urllib2.Request(url, data) | 150 req = urllib2.Request(url, data) |
| 151 response = urllib2.urlopen(req) | 151 response = urllib2.urlopen(req) |
| 152 fasta = response.read() | 152 fasta = response.read() |
| 153 if "Resource temporarily unavailable" in fasta: | |
| 154 return '' # to reiterate the failed download | |
| 153 if self.dbname != "pubmed": | 155 if self.dbname != "pubmed": |
| 154 assert fasta.startswith(">"), fasta | 156 assert fasta.startswith(">"), fasta |
| 155 fasta = self.sanitiser(self.dbname, fasta) # | 157 fasta = self.sanitiser(self.dbname, fasta) # |
| 156 time.sleep(1) | 158 time.sleep(1) |
| 157 return fasta | 159 return fasta |
| 210 with open(self.outname, 'w') as out: | 212 with open(self.outname, 'w') as out: |
| 211 for start in range(0, count, batch_size): | 213 for start in range(0, count, batch_size): |
| 212 end = min(count, start+batch_size) | 214 end = min(count, start+batch_size) |
| 213 batch = uids_list[start:end] | 215 batch = uids_list[start:end] |
| 214 self.epost(self.dbname, ",".join(batch)) | 216 self.epost(self.dbname, ",".join(batch)) |
| 215 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | 217 mfasta = '' |
| 216 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | 218 while not mfasta: |
| 219 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | |
| 220 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | |
| 217 out.write(mfasta + '\n') | 221 out.write(mfasta + '\n') |
| 218 | 222 |
| 219 | 223 |
| 220 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | 224 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' |
| 221 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | 225 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' |
