Mercurial > repos > drosofff > fetch_fasta_from_ncbi
comparison retrieve_fasta_from_NCBI.py @ 14:54941746784b draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit 11ca680184135ef39a6c552d9f3cc427a8ed6c4c
| author | drosofff |
|---|---|
| date | Fri, 16 Jun 2017 05:28:45 -0400 |
| parents | 639daa4c3c1a |
| children |
comparison
equal
deleted
inserted
replaced
| 13:639daa4c3c1a | 14:54941746784b |
|---|---|
| 28 import time | 28 import time |
| 29 import urllib | 29 import urllib |
| 30 import urllib2 | 30 import urllib2 |
| 31 import httplib | 31 import httplib |
| 32 import re | 32 import re |
| 33 | |
| 34 | |
| 35 class QueryException(Exception): | |
| 36 pass | |
| 33 | 37 |
| 34 | 38 |
| 35 class Eutils: | 39 class Eutils: |
| 36 | 40 |
| 37 def __init__(self, options, logger): | 41 def __init__(self, options, logger): |
| 60 self.get_count_value() | 64 self.get_count_value() |
| 61 | 65 |
| 62 # If no UIDs are found exit script | 66 # If no UIDs are found exit script |
| 63 if self.count > 0: | 67 if self.count > 0: |
| 64 self.get_uids_list() | 68 self.get_uids_list() |
| 65 self.get_sequences() | 69 try: |
| 70 self.get_sequences() | |
| 71 except QueryException as e: | |
| 72 self.logger.error("Exiting script.") | |
| 73 raise e | |
| 66 else: | 74 else: |
| 67 self.logger.info("No UIDs were found. Exiting script.") | 75 self.logger.error("No UIDs were found. Exiting script.") |
| 76 raise Exception("") | |
| 68 | 77 |
| 69 def get_count_value(self): | 78 def get_count_value(self): |
| 70 """ | 79 """ |
| 71 just to retrieve Count (number of UIDs) | 80 just to retrieve Count (number of UIDs) |
| 72 Total number of UIDs from the retrieved set to be shown in the XML | 81 Total number of UIDs from the retrieved set to be shown in the XML |
| 193 fasta = response.read() | 202 fasta = response.read() |
| 194 response.close() | 203 response.close() |
| 195 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) | 204 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) |
| 196 or ("Error" in fasta) or (not fasta.startswith(">") ) ): | 205 or ("Error" in fasta) or (not fasta.startswith(">") ) ): |
| 197 serverTransaction = False | 206 serverTransaction = False |
| 207 if ( response_code != 200 ): | |
| 208 self.logger.info("urlopen error: Response code is not 200") | |
| 209 elif ( "Resource temporarily unavailable" in fasta ): | |
| 210 self.logger.info("Ressource temporarily unavailable") | |
| 211 elif ( "Error" in fasta ): | |
| 212 self.logger.info("Error in fasta") | |
| 213 else: | |
| 214 self.logger.info("Fasta doesn't start with '>'") | |
| 198 else: | 215 else: |
| 199 serverTransaction = True | 216 serverTransaction = True |
| 200 except urllib2.HTTPError as e: | 217 except urllib2.HTTPError as e: |
| 201 serverTransaction = False | 218 serverTransaction = False |
| 202 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) | 219 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) |
| 205 self.logger.info("urlopen error: Failed to reach a server") | 222 self.logger.info("urlopen error: Failed to reach a server") |
| 206 self.logger.info("Reason :%s" % ( e.reason ) ) | 223 self.logger.info("Reason :%s" % ( e.reason ) ) |
| 207 except httplib.IncompleteRead as e: | 224 except httplib.IncompleteRead as e: |
| 208 serverTransaction = False | 225 serverTransaction = False |
| 209 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) | 226 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) |
| 227 if (counter > 500): | |
| 228 serverTransaction = True | |
| 229 if (counter > 500): | |
| 230 raise QueryException({"message":"500 Server Transaction Trials attempted for this batch. Aborting."}) | |
| 210 fasta = self.sanitiser(self.dbname, fasta) | 231 fasta = self.sanitiser(self.dbname, fasta) |
| 211 time.sleep(0.1) | 232 time.sleep(0.1) |
| 212 return fasta | 233 return fasta |
| 213 | 234 |
| 214 def sanitiser(self, db, fastaseq): | 235 def sanitiser(self, db, fastaseq): |
| 268 batch = uids_list[start:end] | 289 batch = uids_list[start:end] |
| 269 if self.epost(self.dbname, ",".join(batch)) != -1: | 290 if self.epost(self.dbname, ",".join(batch)) != -1: |
| 270 mfasta = '' | 291 mfasta = '' |
| 271 while not mfasta: | 292 while not mfasta: |
| 272 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | 293 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) |
| 273 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | 294 try: |
| 274 out.write(mfasta + '\n') | 295 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) |
| 296 out.write(mfasta + '\n') | |
| 297 except QueryException as e: | |
| 298 self.logger.error("%s" % e.message) | |
| 299 raise e | |
| 275 | 300 |
| 276 | 301 |
| 277 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | 302 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' |
| 278 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | 303 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' |
| 279 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | 304 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] |
| 299 kwargs['filename'] = options.logfile | 324 kwargs['filename'] = options.logfile |
| 300 logging.basicConfig(**kwargs) | 325 logging.basicConfig(**kwargs) |
| 301 logger = logging.getLogger('data_from_NCBI') | 326 logger = logging.getLogger('data_from_NCBI') |
| 302 | 327 |
| 303 E = Eutils(options, logger) | 328 E = Eutils(options, logger) |
| 304 E.retrieve() | 329 try: |
| 330 E.retrieve() | |
| 331 except Exception as e: | |
| 332 sys.exit(1) | |
| 305 | 333 |
| 306 | 334 |
| 307 if __name__ == "__main__": | 335 if __name__ == "__main__": |
| 308 __main__() | 336 __main__() |
