Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 54:109fc5236204 draft
"planemo upload commit ebdd6549d01d60be6f07abca06f6ce4e2b6beda1"
| author | proteore |
|---|---|
| date | Mon, 08 Jun 2020 14:36:25 +0000 |
| parents | bb552aa4b9ac |
| children | 9fda95925297 |
comparison
equal
deleted
inserted
replaced
| 53:bb552aa4b9ac | 54:109fc5236204 |
|---|---|
| 233 #print ("tab ok") | 233 #print ("tab ok") |
| 234 | 234 |
| 235 #add missing nextprot ID for human or replace old ones | 235 #add missing nextprot ID for human or replace old ones |
| 236 if human : | 236 if human : |
| 237 #build next_dict | 237 #build next_dict |
| 238 nextprot_path = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) | 238 nextprot_path = download_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) |
| 239 with open(nextprot_path,'r') as nextprot_ids : | 239 with open(nextprot_path,'r') as nextprot_ids : |
| 240 nextprot_ids = nextprot_ids.read().splitlines() | 240 nextprot_ids = nextprot_ids.read().splitlines() |
| 241 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1])) | 241 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1])) |
| 242 shutil.move(nextprot_path,archive) | 242 shutil.move(nextprot_path,archive) |
| 243 next_dict = {} | 243 next_dict = {} |
| 272 | 272 |
| 273 def download_from_uniprot_ftp(file,target_directory) : | 273 def download_from_uniprot_ftp(file,target_directory) : |
| 274 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" | 274 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" |
| 275 path = os.path.join(target_directory, file) | 275 path = os.path.join(target_directory, file) |
| 276 ftp = ftplib.FTP("ftp.uniprot.org") | 276 ftp = ftplib.FTP("ftp.uniprot.org") |
| 277 ftp.login("anonymous", "anonymous") | |
| 278 ftp.cwd(ftp_dir) | |
| 279 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | |
| 280 ftp.quit() | |
| 281 return (path) | |
| 282 | |
| 283 def download_from_nextprot_ftp(file,target_directory) : | |
| 284 ftp_dir = "pub/current_release/ac_lists/" | |
| 285 path = os.path.join(target_directory, file) | |
| 286 ftp = ftplib.FTP("ftp.nextprot.org"") | |
| 277 ftp.login("anonymous", "anonymous") | 287 ftp.login("anonymous", "anonymous") |
| 278 ftp.cwd(ftp_dir) | 288 ftp.cwd(ftp_dir) |
| 279 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | 289 ftp.retrbinary("RETR " + file, open(path, 'wb').write) |
| 280 ftp.quit() | 290 ftp.quit() |
| 281 return (path) | 291 return (path) |
| 550 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] | 560 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] |
| 551 writer.writerows(nextprot_file) | 561 writer.writerows(nextprot_file) |
| 552 | 562 |
| 553 for id in ids : | 563 for id in ids : |
| 554 query="https://api.nextprot.org/entry/"+id+".json" | 564 query="https://api.nextprot.org/entry/"+id+".json" |
| 565 try: | |
| 566 resp = requests.get(url=query) | |
| 567 except : | |
| 568 print ("wainting 10 minutes before trying again") | |
| 569 time.sleep(600) | |
| 570 resp = requests.get(url=query) | |
| 571 data = resp.json() | |
| 555 resp = requests.get(url=query) | 572 resp = requests.get(url=query) |
| 556 data = resp.json() | 573 data = resp.json() |
| 557 | 574 |
| 558 #get info from json dictionary | 575 #get info from json dictionary |
| 559 mass_mol = data["entry"]["isoforms"][0]["massAsString"] | 576 mass_mol = data["entry"]["isoforms"][0]["massAsString"] |
