comparison data_manager/resource_building.py @ 51:55b12ec24a9f draft

"planemo upload commit 59b014e9f6e2d668cbd7c4844b10db3d59baefd8-dirty"
author proteore
date Fri, 05 Jun 2020 13:24:48 +0000
parents 7a6d8aafb269
children bb552aa4b9ac
comparison
equal deleted inserted replaced
50:7a6d8aafb269 51:55b12ec24a9f
130 return False 130 return False
131 131
132 ####################################################################################################### 132 #######################################################################################################
133 # 3. ID mapping file 133 # 3. ID mapping file
134 ####################################################################################################### 134 #######################################################################################################
135 import ftplib, gzip 135 import ftplib, gzip
136 from io import StringIO
136 csv.field_size_limit(sys.maxsize) # to handle big files 137 csv.field_size_limit(sys.maxsize) # to handle big files
137 138
138 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) : 139 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) :
139 140
140 human = species == "Human" 141 human = species == "Human"
278 ftp.retrbinary("RETR " + file, open(path, 'wb').write) 279 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
279 ftp.quit() 280 ftp.quit()
280 return (path) 281 return (path)
281 282
282 def id_list_from_nextprot_ftp(file,target_directory) : 283 def id_list_from_nextprot_ftp(file,target_directory) :
283 ftp_dir = "pub/current_release/ac_lists/"
284 path = os.path.join(target_directory, file) 284 path = os.path.join(target_directory, file)
285 ftp = ftplib.FTP("ftp.nextprot.org") 285 ftp = ftplib.FTP("ftp.nextprot.org")
286 ftp.login("anonymous", "anonymous") 286 ftp.login("anonymous", "anonymous")
287 ftp.cwd(ftp_dir) 287 ftp.cwd("pub/current_release/ac_lists/")
288 ftp.retrbinary("RETR " + file, open(path, 'wb').write) 288 r = StringIO()
289 ftp.retrlines("RETR " + file, lambda line: r.write(line + '\n'))
289 ftp.quit() 290 ftp.quit()
290 291 r.seek(0)
291 return (path) 292 ids = r.readlines()
293 ids = [id.strip('\n') for id in ids if id != '']
294 return (ids)
292 295
293 #return '' if there's no value in a dictionary, avoid error 296 #return '' if there's no value in a dictionary, avoid error
294 def access_dictionary (dico,key1,key2) : 297 def access_dictionary (dico,key1,key2) :
295 if key1 in dico : 298 if key1 in dico :
296 if key2 in dico[key1] : 299 if key2 in dico[key1] :
547 550
548 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] 551 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
549 writer.writerows(nextprot_file) 552 writer.writerows(nextprot_file)
550 553
551 for id in ids : 554 for id in ids :
552 #print (id)
553 query="https://api.nextprot.org/entry/"+id+".json" 555 query="https://api.nextprot.org/entry/"+id+".json"
554 resp = requests.get(url=query) 556 resp = requests.get(url=query)
555 data = resp.json() 557 data = resp.json()
556 558
557 #get info from json dictionary 559 #get info from json dictionary