comparison data_manager/resource_building.py @ 28:808c8493ed4f draft

planemo upload commit 5c2c274361c0daceae1f678eca0e6c0d5b4ba4f7-dirty
author proteore
date Mon, 27 May 2019 07:49:19 -0400
parents 9a400ce8e4e6
children 9a40b72414de
comparison
equal deleted inserted replaced
27:9a400ce8e4e6 28:808c8493ed4f
136 human = species == "Human" 136 human = species == "Human"
137 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" } 137 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" }
138 files=["idmapping_selected.tab.gz","idmapping.dat.gz"] 138 files=["idmapping_selected.tab.gz","idmapping.dat.gz"]
139 139
140 #header 140 #header
141 if human : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] 141 if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]]
142 else : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] 142 else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]]
143 143
144 #get selected.tab and keep only ids of interest 144 #get selected.tab and keep only ids of interest
145 selected_tab_file=species_dict[species]+"_"+files[0] 145 selected_tab_file=species_dict[species]+"_"+files[0]
146 tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory) 146 tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory)
147 with gzip.open(tab_path,"rt") as select : 147 with gzip.open(tab_path,"rt") as select :
161 decoded_content = download.content.decode('utf-8') 161 decoded_content = download.content.decode('utf-8')
162 uniprot_reviewed_list = decoded_content.splitlines() 162 uniprot_reviewed_list = decoded_content.splitlines()
163 163
164 for line in tab[1:]: 164 for line in tab[1:]:
165 UniProtAC = line[0] 165 UniProtAC = line[0]
166 if UniProtAC in uniprot_reviewed_list : 166 if UniProtAC not in uniprot_reviewed_list :
167 line.insert(1,UniProtAC) 167 line[0]="NA"
168 else :
169 line.insert(1,"")
170 168
171 """ 169 """
172 Supplementary ID to get from HUMAN_9606_idmapping.dat : 170 Supplementary ID to get from HUMAN_9606_idmapping.dat :
173 -NextProt,BioGrid,STRING,KEGG 171 -NextProt,BioGrid,STRING,KEGG
174 """ 172 """