Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 1:0915249b8c4b draft
planemo upload commit 4dd1a2f7d196a1d2e70fab379a2c08367da0fe94-dirty
| author | proteore |
|---|---|
| date | Wed, 06 Mar 2019 08:53:06 -0500 |
| parents | 0a26460d7366 |
| children | 2e34ee6d2d37 |
comparison
equal
deleted
inserted
replaced
| 0:0a26460d7366 | 1:0915249b8c4b |
|---|---|
| 37 ####################################################################################################### | 37 ####################################################################################################### |
| 38 def HPA_sources(data_manager_dict, tissue, target_directory): | 38 def HPA_sources(data_manager_dict, tissue, target_directory): |
| 39 if tissue == "HPA_normal_tissue": | 39 if tissue == "HPA_normal_tissue": |
| 40 tissue_name = "HPA normal tissue" | 40 tissue_name = "HPA normal tissue" |
| 41 url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip" | 41 url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip" |
| 42 table = "proteore_protein_atlas_normal_tissue" | |
| 42 elif tissue == "HPA_pathology": | 43 elif tissue == "HPA_pathology": |
| 43 tissue_name = "HPA pathology" | 44 tissue_name = "HPA pathology" |
| 44 url = "https://www.proteinatlas.org/download/pathology.tsv.zip" | 45 url = "https://www.proteinatlas.org/download/pathology.tsv.zip" |
| 46 table = "proteore_protein_atlas_tumor_tissue" | |
| 45 elif tissue == "HPA_full_atlas": | 47 elif tissue == "HPA_full_atlas": |
| 46 tissue_name = "HPA full atlas" | 48 tissue_name = "HPA full atlas" |
| 47 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip" | 49 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip" |
| 50 table = "proteore_protein_full_atlas" | |
| 48 | 51 |
| 49 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv" | 52 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv" |
| 50 path = os.path.join(target_directory, output_file) | 53 path = os.path.join(target_directory, output_file) |
| 51 unzip(url, path) #download and save file | 54 unzip(url, path) #download and save file |
| 52 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y") | 55 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y") |
| 53 tissue_id = tissue_name.replace(" ","_").replace("/","-") | 56 tissue_id = tissue_name.replace(" ","_").replace("/","-") |
| 54 | 57 |
| 58 | |
| 55 data_table_entry = dict(id=tissue_id, name = tissue_name, tissue = tissue, value = path) | 59 data_table_entry = dict(id=tissue_id, name = tissue_name, tissue = tissue, value = path) |
| 56 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_protein_atlas") | 60 _add_data_table_entry(data_manager_dict, data_table_entry, table) |
| 57 | 61 |
| 58 | 62 |
| 59 ####################################################################################################### | 63 ####################################################################################################### |
| 60 # 2. Peptide Atlas | 64 # 2. Peptide Atlas |
| 61 ####################################################################################################### | 65 ####################################################################################################### |
| 226 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"} | 230 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"} |
| 227 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")" | 231 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")" |
| 228 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") | 232 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") |
| 229 | 233 |
| 230 data_table_entry = dict(id=id, name = name, value = species, path = path) | 234 data_table_entry = dict(id=id, name = name, value = species, path = path) |
| 231 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping") | 235 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_idmapping_"+species) |
| 232 | 236 |
| 233 def download_from_uniprot_ftp(file,target_directory) : | 237 def download_from_uniprot_ftp(file,target_directory) : |
| 234 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" | 238 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" |
| 235 path = os.path.join(target_directory, file) | 239 path = os.path.join(target_directory, file) |
| 236 ftp = ftplib.FTP("ftp.uniprot.org") | 240 ftp = ftplib.FTP("ftp.uniprot.org") |
