proteore_data_manager: data_manager/resource

comparison data_manager/resource_building.py @ 6:f281a1eb83d6 draft

planemo upload commit 5df487b88ce2146c4be8e1d9f419006583185f6a-dirty

author	proteore
date	Mon, 11 Mar 2019 04:30:28 -0400
parents	429e7481c392
children	d5badf9de1b0

comparison

equal deleted inserted replaced

-:429e7481c392
+:f281a1eb83d6
 json.dump(dico, handle, sort_keys=True)
 data_table_entry = dict(id=id, name = name, species = species, value = path)
 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_"+interactome+"_dictionaries")
+#######################################################################################################
+# 5. nextprot (add protein features)
+#######################################################################################################
+def Build_nextprot_ref_file(target_directory):
+nextprot_ids_file = "nextprot_ac_list_all.txt"
+ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory)
+nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
+for id in ids :
+#print (id)
+query="https://api.nextprot.org/entry/"+id+".json"
+resp = requests.get(url=query)
+data = resp.json()
+#get info from json dictionary
+mass_mol = data["entry"]["isoforms"][0]["massAsString"]
+seq_length = data['entry']["isoforms"][0]["sequenceLength"]
+iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"]
+chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"]
+protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level'])
+#put all subcell loc in a set
+if "subcellular-location" in data['entry']["annotationsByCategory"].keys() :
+subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"]
+all_subcell_locs = set()
+for loc in subcell_locs :
+all_subcell_locs.add(loc['cvTermName'])
+all_subcell_locs.discard("")
+all_subcell_locs = ";".join(all_subcell_locs)
+else :
+all_subcell_locs = "NA"
+#put all subcell loc in a set
+if ('disease') in data['entry']['annotationsByCategory'].keys() :
+diseases = data['entry']['annotationsByCategory']['disease']
+all_diseases = set()
+for disease in diseases :
+if (disease['cvTermName'] is not None and disease['cvTermName'] != ""):
+all_diseases.add(disease['cvTermName'])
+if len(all_diseases) > 0 : all_diseases = ";".join(all_diseases)
+else : all_diseases="NA"
+else :
+all_diseases="NA"
+#get all tm domain
+nb_domains = 0
+if  "domain" in data['entry']['annotationsByCategory'].keys():
+tm_domains = data['entry']['annotationsByCategory']["domain"]
+for tm_domain in tm_domains :
+if "properties" in tm_domain.keys() and tm_domain['properties']!=[]:
+domains = tm_domains["properties"]
+for domain in domains :
+if domain["name"]=="region structure" and domain["value"]=="Helical" :
+nb_domains+=1
+nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence])
+output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv"
+path = os.path.join(target_directory,output_file)
+name = "neXtProt release "+time.strftime("%d-%m-%Y")
+id = "nextprot_ref_"+time.strftime("%d-%m-%Y")
+with open(path, 'w') as output:
+writer = csv.writer(output,delimiter="\t")
+writer.writerows(nextprot_file)
+data_table_entry = dict(id=id, name = name, value = path)
+_add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref")
 #######################################################################################################
 # Main function
 #######################################################################################################
 def main():
 parser.add_argument("--id_mapping", metavar = ("ID_MAPPING_SPECIES"))
 parser.add_argument("--interactome", metavar = ("PPI"))
 parser.add_argument("--species")
 parser.add_argument("--date")
 parser.add_argument("-o", "--output")
+parser.add_argument("--database")
 args = parser.parse_args()
 data_manager_dict = {}
 # Extract json file params
 filename = args.output
 except NameError:
 interactome=None
 species=None
 if interactome is not None and species is not None:
 PPI_ref_files(data_manager_dict, species, interactome, target_directory)
+## Build nextprot ref file for add protein features
+try:
+database=args.database
+except NameError:
+database=None
+if database is not None :
+Build_nextprot_ref_file(target_directory)
 #save info to json file
 filename = args.output
 open(filename, 'wb').write(to_json_string(data_manager_dict))
 if __name__ == "__main__":

Mercurial > repos > proteore > proteore_data_manager

comparison data_manager/resource_building.py @ 6:f281a1eb83d6 draft