comparison data_manager/resource_building.py @ 54:109fc5236204 draft

"planemo upload commit ebdd6549d01d60be6f07abca06f6ce4e2b6beda1"
author proteore
date Mon, 08 Jun 2020 14:36:25 +0000
parents bb552aa4b9ac
children 9fda95925297
comparison
equal deleted inserted replaced
53:bb552aa4b9ac 54:109fc5236204
233 #print ("tab ok") 233 #print ("tab ok")
234 234
235 #add missing nextprot ID for human or replace old ones 235 #add missing nextprot ID for human or replace old ones
236 if human : 236 if human :
237 #build next_dict 237 #build next_dict
238 nextprot_path = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) 238 nextprot_path = download_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
239 with open(nextprot_path,'r') as nextprot_ids : 239 with open(nextprot_path,'r') as nextprot_ids :
240 nextprot_ids = nextprot_ids.read().splitlines() 240 nextprot_ids = nextprot_ids.read().splitlines()
241 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1])) 241 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1]))
242 shutil.move(nextprot_path,archive) 242 shutil.move(nextprot_path,archive)
243 next_dict = {} 243 next_dict = {}
272 272
273 def download_from_uniprot_ftp(file,target_directory) : 273 def download_from_uniprot_ftp(file,target_directory) :
274 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" 274 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/"
275 path = os.path.join(target_directory, file) 275 path = os.path.join(target_directory, file)
276 ftp = ftplib.FTP("ftp.uniprot.org") 276 ftp = ftplib.FTP("ftp.uniprot.org")
277 ftp.login("anonymous", "anonymous")
278 ftp.cwd(ftp_dir)
279 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
280 ftp.quit()
281 return (path)
282
283 def download_from_nextprot_ftp(file,target_directory) :
284 ftp_dir = "pub/current_release/ac_lists/"
285 path = os.path.join(target_directory, file)
286 ftp = ftplib.FTP("ftp.nextprot.org"")
277 ftp.login("anonymous", "anonymous") 287 ftp.login("anonymous", "anonymous")
278 ftp.cwd(ftp_dir) 288 ftp.cwd(ftp_dir)
279 ftp.retrbinary("RETR " + file, open(path, 'wb').write) 289 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
280 ftp.quit() 290 ftp.quit()
281 return (path) 291 return (path)
550 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] 560 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
551 writer.writerows(nextprot_file) 561 writer.writerows(nextprot_file)
552 562
553 for id in ids : 563 for id in ids :
554 query="https://api.nextprot.org/entry/"+id+".json" 564 query="https://api.nextprot.org/entry/"+id+".json"
565 try:
566 resp = requests.get(url=query)
567 except :
568 print ("wainting 10 minutes before trying again")
569 time.sleep(600)
570 resp = requests.get(url=query)
571 data = resp.json()
555 resp = requests.get(url=query) 572 resp = requests.get(url=query)
556 data = resp.json() 573 data = resp.json()
557 574
558 #get info from json dictionary 575 #get info from json dictionary
559 mass_mol = data["entry"]["isoforms"][0]["massAsString"] 576 mass_mol = data["entry"]["isoforms"][0]["massAsString"]