Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 28:808c8493ed4f draft
planemo upload commit 5c2c274361c0daceae1f678eca0e6c0d5b4ba4f7-dirty
| author | proteore |
|---|---|
| date | Mon, 27 May 2019 07:49:19 -0400 |
| parents | 9a400ce8e4e6 |
| children | 9a40b72414de |
comparison
equal
deleted
inserted
replaced
| 27:9a400ce8e4e6 | 28:808c8493ed4f |
|---|---|
| 136 human = species == "Human" | 136 human = species == "Human" |
| 137 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" } | 137 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" } |
| 138 files=["idmapping_selected.tab.gz","idmapping.dat.gz"] | 138 files=["idmapping_selected.tab.gz","idmapping.dat.gz"] |
| 139 | 139 |
| 140 #header | 140 #header |
| 141 if human : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] | 141 if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] |
| 142 else : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] | 142 else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] |
| 143 | 143 |
| 144 #get selected.tab and keep only ids of interest | 144 #get selected.tab and keep only ids of interest |
| 145 selected_tab_file=species_dict[species]+"_"+files[0] | 145 selected_tab_file=species_dict[species]+"_"+files[0] |
| 146 tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory) | 146 tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory) |
| 147 with gzip.open(tab_path,"rt") as select : | 147 with gzip.open(tab_path,"rt") as select : |
| 161 decoded_content = download.content.decode('utf-8') | 161 decoded_content = download.content.decode('utf-8') |
| 162 uniprot_reviewed_list = decoded_content.splitlines() | 162 uniprot_reviewed_list = decoded_content.splitlines() |
| 163 | 163 |
| 164 for line in tab[1:]: | 164 for line in tab[1:]: |
| 165 UniProtAC = line[0] | 165 UniProtAC = line[0] |
| 166 if UniProtAC in uniprot_reviewed_list : | 166 if UniProtAC not in uniprot_reviewed_list : |
| 167 line.insert(1,UniProtAC) | 167 line[0]="NA" |
| 168 else : | |
| 169 line.insert(1,"") | |
| 170 | 168 |
| 171 """ | 169 """ |
| 172 Supplementary ID to get from HUMAN_9606_idmapping.dat : | 170 Supplementary ID to get from HUMAN_9606_idmapping.dat : |
| 173 -NextProt,BioGrid,STRING,KEGG | 171 -NextProt,BioGrid,STRING,KEGG |
| 174 """ | 172 """ |
