comparison data_manager/resource_building.py @ 1:0915249b8c4b draft

planemo upload commit 4dd1a2f7d196a1d2e70fab379a2c08367da0fe94-dirty
author proteore
date Wed, 06 Mar 2019 08:53:06 -0500
parents 0a26460d7366
children 2e34ee6d2d37
comparison
equal deleted inserted replaced
0:0a26460d7366 1:0915249b8c4b
37 ####################################################################################################### 37 #######################################################################################################
38 def HPA_sources(data_manager_dict, tissue, target_directory): 38 def HPA_sources(data_manager_dict, tissue, target_directory):
39 if tissue == "HPA_normal_tissue": 39 if tissue == "HPA_normal_tissue":
40 tissue_name = "HPA normal tissue" 40 tissue_name = "HPA normal tissue"
41 url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip" 41 url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip"
42 table = "proteore_protein_atlas_normal_tissue"
42 elif tissue == "HPA_pathology": 43 elif tissue == "HPA_pathology":
43 tissue_name = "HPA pathology" 44 tissue_name = "HPA pathology"
44 url = "https://www.proteinatlas.org/download/pathology.tsv.zip" 45 url = "https://www.proteinatlas.org/download/pathology.tsv.zip"
46 table = "proteore_protein_atlas_tumor_tissue"
45 elif tissue == "HPA_full_atlas": 47 elif tissue == "HPA_full_atlas":
46 tissue_name = "HPA full atlas" 48 tissue_name = "HPA full atlas"
47 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip" 49 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
50 table = "proteore_protein_full_atlas"
48 51
49 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv" 52 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
50 path = os.path.join(target_directory, output_file) 53 path = os.path.join(target_directory, output_file)
51 unzip(url, path) #download and save file 54 unzip(url, path) #download and save file
52 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y") 55 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
53 tissue_id = tissue_name.replace(" ","_").replace("/","-") 56 tissue_id = tissue_name.replace(" ","_").replace("/","-")
54 57
58
55 data_table_entry = dict(id=tissue_id, name = tissue_name, tissue = tissue, value = path) 59 data_table_entry = dict(id=tissue_id, name = tissue_name, tissue = tissue, value = path)
56 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_protein_atlas") 60 _add_data_table_entry(data_manager_dict, data_table_entry, table)
57 61
58 62
59 ####################################################################################################### 63 #######################################################################################################
60 # 2. Peptide Atlas 64 # 2. Peptide Atlas
61 ####################################################################################################### 65 #######################################################################################################
226 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"} 230 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"}
227 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")" 231 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")"
228 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") 232 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
229 233
230 data_table_entry = dict(id=id, name = name, value = species, path = path) 234 data_table_entry = dict(id=id, name = name, value = species, path = path)
231 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping") 235 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_idmapping_"+species)
232 236
233 def download_from_uniprot_ftp(file,target_directory) : 237 def download_from_uniprot_ftp(file,target_directory) :
234 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" 238 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/"
235 path = os.path.join(target_directory, file) 239 path = os.path.join(target_directory, file)
236 ftp = ftplib.FTP("ftp.uniprot.org") 240 ftp = ftplib.FTP("ftp.uniprot.org")