Mercurial > repos > proteore > proteore_data_manager
changeset 30:a6cabd3ab71f draft
"planemo upload commit b89f1921a1759139b452c6fac1ad7ee01b6b633d-dirty"
| author | proteore |
|---|---|
| date | Thu, 12 Dec 2019 09:26:42 +0000 |
| parents | 9a40b72414de |
| children | faeeabb11a4d |
| files | data_manager/resource_building.py data_manager/resource_building.xml |
| diffstat | 2 files changed, 14 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/resource_building.py Thu Jun 13 10:21:25 2019 -0400 +++ b/data_manager/resource_building.py Thu Dec 12 09:26:42 2019 +0000 @@ -138,8 +138,8 @@ files=["idmapping_selected.tab.gz","idmapping.dat.gz"] #header - if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] - else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] + if human : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG",'Gene_Name']] + else : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG",'Gene_Name']] #get selected.tab and keep only ids of interest selected_tab_file=species_dict[species]+"_"+files[0] @@ -147,7 +147,7 @@ with gzip.open(tab_path,"rt") as select : tab_reader = csv.reader(select,delimiter="\t") for line in tab_reader : - tab.append([line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]]) + tab.append([line[0]]+[line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]]) os.remove(tab_path) #print("selected_tab ok") @@ -162,10 +162,10 @@ uniprot_reviewed_list = decoded_content.splitlines() for line in tab[1:]: - UniProtAC = line[0] + UniProtAC = line[1] if UniProtAC not in uniprot_reviewed_list : - line[0]="" line[1]="" + line[2]="" """ Supplementary ID to get from HUMAN_9606_idmapping.dat : @@ -173,8 +173,8 @@ """ #there's more id type for human - if human : ids = ['neXtProt','BioGrid','STRING','KEGG' ] #ids to get from dat_file - else : ids = ['BioGrid','STRING','KEGG' ] + if human : ids = ['neXtProt','BioGrid','STRING','KEGG','Gene_Name' ] #ids to get from dat_file + else : ids = ['BioGrid','STRING','KEGG','Gene_Name' ] unidict = {} #keep only ids of interest in dictionaries @@ -206,15 +206,15 @@ nextprot = access_dictionary(unidict,uniprotID,'neXtProt') if nextprot != '' : nextprot = clean_nextprot_id(nextprot,line[0]) line.extend([nextprot,access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'), - access_dictionary(unidict,uniprotID,'KEGG')]) + access_dictionary(unidict,uniprotID,'KEGG'),access_dictionary(unidict,uniprotID,'Gene_Name')]) else : - line.extend(["","","",""]) + line.extend(["","","","",""]) else : if uniprotID in unidict : line.extend([access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'), - access_dictionary(unidict,uniprotID,'KEGG')]) + access_dictionary(unidict,uniprotID,'KEGG'),access_dictionary(unidict,uniprotID,'Gene_Name')]) else : - line.extend(["","",""]) + line.extend(["","","",""]) #print ("tab ok") @@ -230,9 +230,9 @@ #add missing nextprot ID for line in tab[1:] : uniprotID=line[0] - nextprotID=line[13] + nextprotID=line[14] if uniprotID in next_dict and (nextprotID == '' or (nextprotID != "NX_"+uniprotID and next_dict[uniprotID] == "NX_"+uniprotID)) : - line[13]=next_dict[uniprotID] + line[14]=next_dict[uniprotID] output_file = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + ".tsv" path = os.path.join(target_directory,output_file)
--- a/data_manager/resource_building.xml Thu Jun 13 10:21:25 2019 -0400 +++ b/data_manager/resource_building.xml Thu Dec 12 09:26:42 2019 +0000 @@ -1,4 +1,4 @@ -<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.06.13" tool_type="manage_data"> +<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.12.12" tool_type="manage_data"> <description> to create or update reference files for proteore tools </description>
