Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 12:f6afaa1f562c draft
planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c-dirty
| author | proteore |
|---|---|
| date | Wed, 17 Apr 2019 09:45:33 -0400 |
| parents | ac2cd728c40e |
| children | 098693479a9d |
comparison
equal
deleted
inserted
replaced
| 11:ac2cd728c40e | 12:f6afaa1f562c |
|---|---|
| 499 ####################################################################################################### | 499 ####################################################################################################### |
| 500 | 500 |
| 501 def Build_nextprot_ref_file(data_manager_dict,target_directory): | 501 def Build_nextprot_ref_file(data_manager_dict,target_directory): |
| 502 nextprot_ids_file = "nextprot_ac_list_all.txt" | 502 nextprot_ids_file = "nextprot_ac_list_all.txt" |
| 503 ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory) | 503 ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory) |
| 504 | 504 |
| 505 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv" | |
| 506 path = os.path.join(target_directory,output_file) | |
| 507 name = "neXtProt release "+time.strftime("%d-%m-%Y") | |
| 508 id = "nextprot_ref_"+time.strftime("%d-%m-%Y") | |
| 509 | |
| 510 | |
| 511 with open(path, 'w') as output: | |
| 512 writer = csv.writer(output,delimiter="\t") | |
| 513 | |
| 514 | |
| 515 | |
| 505 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] | 516 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] |
| 506 for id in ids : | 517 for id in ids : |
| 507 #print (id) | 518 #print (id) |
| 508 query="https://api.nextprot.org/entry/"+id+".json" | 519 query="https://api.nextprot.org/entry/"+id+".json" |
| 509 resp = requests.get(url=query) | 520 resp = requests.get(url=query) |
| 545 tm_domains = data['entry']['annotationsByCategory']["transmembrane-region"] | 556 tm_domains = data['entry']['annotationsByCategory']["transmembrane-region"] |
| 546 all_tm_domains = set() | 557 all_tm_domains = set() |
| 547 for tm in tm_domains : | 558 for tm in tm_domains : |
| 548 all_tm_domains.add(tm['cvTermName']) | 559 all_tm_domains.add(tm['cvTermName']) |
| 549 nb_domains+=1 | 560 nb_domains+=1 |
| 550 print "nb domains ++" | 561 # print "nb domains ++" |
| 551 print (nb_domains) | 562 # print (nb_domains) |
| 552 | 563 |
| 553 nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence]) | 564 nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence]) |
| 554 | |
| 555 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv" | |
| 556 path = os.path.join(target_directory,output_file) | |
| 557 name = "neXtProt release "+time.strftime("%d-%m-%Y") | |
| 558 id = "nextprot_ref_"+time.strftime("%d-%m-%Y") | |
| 559 | |
| 560 with open(path, 'w') as output: | |
| 561 writer = csv.writer(output,delimiter="\t") | |
| 562 writer.writerows(nextprot_file) | 565 writer.writerows(nextprot_file) |
| 563 | 566 |
| 564 data_table_entry = dict(id=id, name = name, value = path) | 567 data_table_entry = dict(id=id, name = name, value = path) |
| 565 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref") | 568 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref") |
| 566 | 569 |
