Mercurial > repos > galaxyp > retrieve_ensembl_bed
comparison ensembl_rest.py @ 0:887e111c0919 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
| author | galaxyp |
|---|---|
| date | Sun, 14 Jan 2018 14:11:53 -0500 |
| parents | |
| children | c3d600729b6f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:887e111c0919 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 # | |
| 4 #------------------------------------------------------------------------------ | |
| 5 # University of Minnesota | |
| 6 # Copyright 2017, Regents of the University of Minnesota | |
| 7 #------------------------------------------------------------------------------ | |
| 8 # Author: | |
| 9 # | |
| 10 # James E Johnson | |
| 11 # | |
| 12 #------------------------------------------------------------------------------ | |
| 13 """ | |
| 14 | |
| 15 | |
| 16 import sys | |
| 17 | |
| 18 from time import sleep | |
| 19 | |
| 20 import requests | |
| 21 | |
| 22 | |
| 23 server = "https://rest.ensembl.org" | |
| 24 ext = "/info/assembly/homo_sapiens?" | |
| 25 max_region = 4000000 | |
| 26 debug = False | |
| 27 | |
| 28 | |
| 29 def ensembl_rest(ext, headers): | |
| 30 if debug: | |
| 31 print >> sys.stderr, "%s" % ext | |
| 32 r = requests.get(server+ext, headers=headers) | |
| 33 if r.status_code == 429: | |
| 34 print >> sys.stderr, "response headers: %s\n" % r.headers | |
| 35 if 'Retry-After' in r.headers: | |
| 36 sleep(r.headers['Retry-After']) | |
| 37 r = requests.get(server+ext, headers=headers) | |
| 38 if not r.ok: | |
| 39 r.raise_for_status() | |
| 40 return r | |
| 41 | |
| 42 | |
| 43 def get_species(): | |
| 44 results = dict() | |
| 45 ext = "/info/species" | |
| 46 req_header = {"Content-Type": "application/json"} | |
| 47 r = ensembl_rest(ext, req_header) | |
| 48 for species in r.json()['species']: | |
| 49 results[species['name']] = species | |
| 50 print >> sys.stdout,\ | |
| 51 "%s\t%s\t%s\t%s\t%s"\ | |
| 52 % (species['name'], species['common_name'], | |
| 53 species['display_name'], | |
| 54 species['strain'], | |
| 55 species['taxon_id']) | |
| 56 return results | |
| 57 | |
| 58 | |
| 59 def get_biotypes(species): | |
| 60 biotypes = [] | |
| 61 ext = "/info/biotypes/%s?" % species | |
| 62 req_header = {"Content-Type": "application/json"} | |
| 63 r = ensembl_rest(ext, req_header) | |
| 64 for entry in r.json(): | |
| 65 if 'biotype' in entry: | |
| 66 biotypes.append(entry['biotype']) | |
| 67 return biotypes | |
| 68 | |
| 69 | |
| 70 def get_toplevel(species): | |
| 71 coord_systems = dict() | |
| 72 ext = "/info/assembly/%s?" % species | |
| 73 req_header = {"Content-Type": "application/json"} | |
| 74 r = ensembl_rest(ext, req_header) | |
| 75 toplevel = r.json() | |
| 76 for seq in toplevel['top_level_region']: | |
| 77 if seq['coord_system'] not in coord_systems: | |
| 78 coord_systems[seq['coord_system']] = dict() | |
| 79 coord_system = coord_systems[seq['coord_system']] | |
| 80 coord_system[seq['name']] = int(seq['length']) | |
| 81 return coord_systems | |
| 82 | |
| 83 | |
| 84 def get_transcripts_bed(species, refseq, start, length, strand='', | |
| 85 params=None): | |
| 86 bed = [] | |
| 87 param = params if params else '' | |
| 88 req_header = {"Content-Type": "text/x-bed"} | |
| 89 regions = range(start, length, max_region) | |
| 90 if not regions or regions[-1] < length: | |
| 91 regions.append(length) | |
| 92 for end in regions[1:]: | |
| 93 ext = "/overlap/region/%s/%s:%d-%d%s?feature=transcript;%s"\ | |
| 94 % (species, refseq, start, end, strand, param) | |
| 95 start = end + 1 | |
| 96 r = ensembl_rest(ext, req_header) | |
| 97 if r.text: | |
| 98 bed += r.text.splitlines() | |
| 99 return bed | |
| 100 | |
| 101 | |
| 102 def get_seq(id, seqtype, params=None): | |
| 103 param = params if params else '' | |
| 104 ext = "/sequence/id/%s?type=%s;%s" % (id, seqtype, param) | |
| 105 req_header = {"Content-Type": "text/plain"} | |
| 106 r = ensembl_rest(ext, req_header) | |
| 107 return r.text | |
| 108 | |
| 109 | |
| 110 def get_cdna(id, params=None): | |
| 111 return get_seq(id, 'cdna', params=params) | |
| 112 | |
| 113 | |
| 114 def get_cds(id, params=None): | |
| 115 return get_seq(id, 'cds', params=params) | |
| 116 | |
| 117 | |
| 118 def get_genomic(id, params=None): | |
| 119 return get_seq(id, 'genomic', params=params) | |
| 120 | |
| 121 | |
| 122 def get_transcript_haplotypes(species, transcript): | |
| 123 ext = "/transcript_haplotypes/%s/%s?aligned_sequences=1"\ | |
| 124 % (species, transcript) | |
| 125 req_header = {"Content-Type": "application/json"} | |
| 126 r = ensembl_rest(ext, req_header) | |
| 127 decoded = r.json() | |
| 128 return decoded |
