Mercurial > repos > jjohnson > iedb_api
comparison iedb_api.py @ 0:7a9ecf229480 draft default tip
Uploaded
| author | jjohnson |
|---|---|
| date | Mon, 28 Sep 2015 11:27:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7a9ecf229480 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 """ | |
| 4 import sys | |
| 5 import os.path | |
| 6 import re | |
| 7 import optparse | |
| 8 import urllib | |
| 9 import urllib2 | |
| 10 from optparse import OptionParser | |
| 11 | |
| 12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket'] | |
| 13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope'] | |
| 14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008'] | |
| 15 mhcnp_methods = ['mhcnp'] | |
| 16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker'] | |
| 17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods} | |
| 18 | |
| 19 def warn_err(msg,exit_code=1): | |
| 20 sys.stderr.write(msg) | |
| 21 if exit_code: | |
| 22 sys.exit(exit_code) | |
| 23 | |
| 24 | |
| 25 def __main__(): | |
| 26 #Parse Command Line | |
| 27 parser = optparse.OptionParser() | |
| 28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' ) | |
| 29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' ) | |
| 30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' ) | |
| 31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' ) | |
| 32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' ) | |
| 33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' ) | |
| 34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' ) | |
| 35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' ) | |
| 36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' ) | |
| 37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) | |
| 38 (options, args) = parser.parse_args() | |
| 39 | |
| 40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | |
| 41 | |
| 42 if not options.allele and options.prediction != 'bcell': | |
| 43 warn_err('-a allele required\n', exit_code=1) | |
| 44 | |
| 45 if not (options.sequence or options.input): | |
| 46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1) | |
| 47 | |
| 48 if options.output != None: | |
| 49 try: | |
| 50 outputPath = os.path.abspath(options.output) | |
| 51 outputFile = open(outputPath, 'w') | |
| 52 except Exception, e: | |
| 53 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
| 54 else: | |
| 55 outputFile = sys.stdout | |
| 56 | |
| 57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction | |
| 58 | |
| 59 #TODO parse alleles from the options.alleles file | |
| 60 alleles = ','.join(options.allele) | |
| 61 lengths = ','.join(options.length) | |
| 62 method = options.method | |
| 63 | |
| 64 results = [] | |
| 65 global header | |
| 66 header = None | |
| 67 | |
| 68 sequence_text = [] | |
| 69 def add_seq(seqid,seq): | |
| 70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq)) | |
| 71 | |
| 72 def query(url,seq,allele,length,seqid=None,method='recommended'): | |
| 73 global header | |
| 74 params = dict() | |
| 75 if method: | |
| 76 params['method'] = method | |
| 77 params['sequence_text'] = seq | |
| 78 params['allele'] = allele | |
| 79 params['length'] = length | |
| 80 data = urllib.urlencode(params) | |
| 81 request = urllib2.Request(url, data) | |
| 82 if options.debug: | |
| 83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq) | |
| 84 response = None | |
| 85 response = urllib2.urlopen(request) | |
| 86 if response and response.getcode() == 200: | |
| 87 resp_data = response.readlines() | |
| 88 for line in resp_data: | |
| 89 if line.find('eptide') > 0: | |
| 90 header = "#%s%s" % ("ID\t" if seqid else "", line) | |
| 91 continue | |
| 92 if seqid: | |
| 93 results.append("%s\t%s" % (seqid,line)) | |
| 94 else: | |
| 95 results.append(line) | |
| 96 elif not response: | |
| 97 warn_err("NO response from IEDB server\n", exit_code=3) | |
| 98 else: | |
| 99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode()) | |
| 100 | |
| 101 if options.sequence: | |
| 102 for i,seq in enumerate(options.sequence): | |
| 103 query(url,seq,alleles,lengths,seqid=None,method=method) | |
| 104 if options.input: | |
| 105 try: | |
| 106 fh = open(options.input,'r') | |
| 107 if options.column: ## tabular | |
| 108 col = int(options.column) | |
| 109 idcol = int(options.id_column) if options.id_column else None | |
| 110 for i,line in enumerate(fh): | |
| 111 fields = line.split('\t') | |
| 112 if len(fields) > col: | |
| 113 seq = re.sub('[_*]','',fields[col]) | |
| 114 if re.match(aapat,seq): | |
| 115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None | |
| 116 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
| 117 else: | |
| 118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None) | |
| 119 else: ## fasta | |
| 120 seqid = None | |
| 121 seq = '' | |
| 122 for i,line in enumerate(fh): | |
| 123 if line.startswith('>'): | |
| 124 if seqid and len(seq) > 0: | |
| 125 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
| 126 seqid = line[1:].strip() | |
| 127 seq = '' | |
| 128 else: | |
| 129 seq += line.strip() | |
| 130 if seqid and len(seq) > 0: | |
| 131 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
| 132 fh.close() | |
| 133 except Exception, e: | |
| 134 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
| 135 | |
| 136 if header: | |
| 137 outputFile.write(header) | |
| 138 for line in results: | |
| 139 outputFile.write(line) | |
| 140 | |
| 141 if __name__ == "__main__": __main__() | |
| 142 |
