Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 51:55b12ec24a9f draft
"planemo upload commit 59b014e9f6e2d668cbd7c4844b10db3d59baefd8-dirty"
| author | proteore |
|---|---|
| date | Fri, 05 Jun 2020 13:24:48 +0000 |
| parents | 7a6d8aafb269 |
| children | bb552aa4b9ac |
comparison
equal
deleted
inserted
replaced
| 50:7a6d8aafb269 | 51:55b12ec24a9f |
|---|---|
| 130 return False | 130 return False |
| 131 | 131 |
| 132 ####################################################################################################### | 132 ####################################################################################################### |
| 133 # 3. ID mapping file | 133 # 3. ID mapping file |
| 134 ####################################################################################################### | 134 ####################################################################################################### |
| 135 import ftplib, gzip | 135 import ftplib, gzip |
| 136 from io import StringIO | |
| 136 csv.field_size_limit(sys.maxsize) # to handle big files | 137 csv.field_size_limit(sys.maxsize) # to handle big files |
| 137 | 138 |
| 138 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) : | 139 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) : |
| 139 | 140 |
| 140 human = species == "Human" | 141 human = species == "Human" |
| 278 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | 279 ftp.retrbinary("RETR " + file, open(path, 'wb').write) |
| 279 ftp.quit() | 280 ftp.quit() |
| 280 return (path) | 281 return (path) |
| 281 | 282 |
| 282 def id_list_from_nextprot_ftp(file,target_directory) : | 283 def id_list_from_nextprot_ftp(file,target_directory) : |
| 283 ftp_dir = "pub/current_release/ac_lists/" | |
| 284 path = os.path.join(target_directory, file) | 284 path = os.path.join(target_directory, file) |
| 285 ftp = ftplib.FTP("ftp.nextprot.org") | 285 ftp = ftplib.FTP("ftp.nextprot.org") |
| 286 ftp.login("anonymous", "anonymous") | 286 ftp.login("anonymous", "anonymous") |
| 287 ftp.cwd(ftp_dir) | 287 ftp.cwd("pub/current_release/ac_lists/") |
| 288 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | 288 r = StringIO() |
| 289 ftp.retrlines("RETR " + file, lambda line: r.write(line + '\n')) | |
| 289 ftp.quit() | 290 ftp.quit() |
| 290 | 291 r.seek(0) |
| 291 return (path) | 292 ids = r.readlines() |
| 293 ids = [id.strip('\n') for id in ids if id != ''] | |
| 294 return (ids) | |
| 292 | 295 |
| 293 #return '' if there's no value in a dictionary, avoid error | 296 #return '' if there's no value in a dictionary, avoid error |
| 294 def access_dictionary (dico,key1,key2) : | 297 def access_dictionary (dico,key1,key2) : |
| 295 if key1 in dico : | 298 if key1 in dico : |
| 296 if key2 in dico[key1] : | 299 if key2 in dico[key1] : |
| 547 | 550 |
| 548 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] | 551 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] |
| 549 writer.writerows(nextprot_file) | 552 writer.writerows(nextprot_file) |
| 550 | 553 |
| 551 for id in ids : | 554 for id in ids : |
| 552 #print (id) | |
| 553 query="https://api.nextprot.org/entry/"+id+".json" | 555 query="https://api.nextprot.org/entry/"+id+".json" |
| 554 resp = requests.get(url=query) | 556 resp = requests.get(url=query) |
| 555 data = resp.json() | 557 data = resp.json() |
| 556 | 558 |
| 557 #get info from json dictionary | 559 #get info from json dictionary |
