comparison data_manager/resource_building.py @ 12:f6afaa1f562c draft

planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c-dirty
author proteore
date Wed, 17 Apr 2019 09:45:33 -0400
parents ac2cd728c40e
children 098693479a9d
comparison
equal deleted inserted replaced
11:ac2cd728c40e 12:f6afaa1f562c
499 ####################################################################################################### 499 #######################################################################################################
500 500
501 def Build_nextprot_ref_file(data_manager_dict,target_directory): 501 def Build_nextprot_ref_file(data_manager_dict,target_directory):
502 nextprot_ids_file = "nextprot_ac_list_all.txt" 502 nextprot_ids_file = "nextprot_ac_list_all.txt"
503 ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory) 503 ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory)
504 504
505 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv"
506 path = os.path.join(target_directory,output_file)
507 name = "neXtProt release "+time.strftime("%d-%m-%Y")
508 id = "nextprot_ref_"+time.strftime("%d-%m-%Y")
509
510
511 with open(path, 'w') as output:
512 writer = csv.writer(output,delimiter="\t")
513
514
515
505 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] 516 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
506 for id in ids : 517 for id in ids :
507 #print (id) 518 #print (id)
508 query="https://api.nextprot.org/entry/"+id+".json" 519 query="https://api.nextprot.org/entry/"+id+".json"
509 resp = requests.get(url=query) 520 resp = requests.get(url=query)
545 tm_domains = data['entry']['annotationsByCategory']["transmembrane-region"] 556 tm_domains = data['entry']['annotationsByCategory']["transmembrane-region"]
546 all_tm_domains = set() 557 all_tm_domains = set()
547 for tm in tm_domains : 558 for tm in tm_domains :
548 all_tm_domains.add(tm['cvTermName']) 559 all_tm_domains.add(tm['cvTermName'])
549 nb_domains+=1 560 nb_domains+=1
550 print "nb domains ++" 561 # print "nb domains ++"
551 print (nb_domains) 562 # print (nb_domains)
552 563
553 nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence]) 564 nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence])
554
555 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv"
556 path = os.path.join(target_directory,output_file)
557 name = "neXtProt release "+time.strftime("%d-%m-%Y")
558 id = "nextprot_ref_"+time.strftime("%d-%m-%Y")
559
560 with open(path, 'w') as output:
561 writer = csv.writer(output,delimiter="\t")
562 writer.writerows(nextprot_file) 565 writer.writerows(nextprot_file)
563 566
564 data_table_entry = dict(id=id, name = name, value = path) 567 data_table_entry = dict(id=id, name = name, value = path)
565 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref") 568 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref")
566 569