changeset 30:a6cabd3ab71f draft

"planemo upload commit b89f1921a1759139b452c6fac1ad7ee01b6b633d-dirty"
author proteore
date Thu, 12 Dec 2019 09:26:42 +0000
parents 9a40b72414de
children faeeabb11a4d
files data_manager/resource_building.py data_manager/resource_building.xml
diffstat 2 files changed, 14 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/resource_building.py	Thu Jun 13 10:21:25 2019 -0400
+++ b/data_manager/resource_building.py	Thu Dec 12 09:26:42 2019 +0000
@@ -138,8 +138,8 @@
     files=["idmapping_selected.tab.gz","idmapping.dat.gz"]
 
     #header
-    if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]]
-    else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]]
+    if human : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG",'Gene_Name']]
+    else : tab = [["UniProt-AC","UniProt-AC_reviewed","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG",'Gene_Name']]
 
     #get selected.tab and keep only ids of interest
     selected_tab_file=species_dict[species]+"_"+files[0]
@@ -147,7 +147,7 @@
     with gzip.open(tab_path,"rt") as select :
         tab_reader = csv.reader(select,delimiter="\t")
         for line in tab_reader :
-            tab.append([line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]])
+            tab.append([line[0]]+[line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]])
     os.remove(tab_path)
 
     #print("selected_tab ok")
@@ -162,10 +162,10 @@
         uniprot_reviewed_list = decoded_content.splitlines()
 
     for line in tab[1:]:
-        UniProtAC = line[0]
+        UniProtAC = line[1]
         if UniProtAC not in uniprot_reviewed_list :
-            line[0]=""
             line[1]=""
+            line[2]=""
 
     """
     Supplementary ID to get from HUMAN_9606_idmapping.dat :
@@ -173,8 +173,8 @@
     """
 
     #there's more id type for human
-    if human : ids = ['neXtProt','BioGrid','STRING','KEGG' ]   #ids to get from dat_file
-    else : ids = ['BioGrid','STRING','KEGG' ]
+    if human : ids = ['neXtProt','BioGrid','STRING','KEGG','Gene_Name' ]   #ids to get from dat_file
+    else : ids = ['BioGrid','STRING','KEGG','Gene_Name' ]
     unidict = {}
 
     #keep only ids of interest in dictionaries
@@ -206,15 +206,15 @@
                 nextprot = access_dictionary(unidict,uniprotID,'neXtProt')
                 if nextprot != '' : nextprot = clean_nextprot_id(nextprot,line[0])
                 line.extend([nextprot,access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
-                        access_dictionary(unidict,uniprotID,'KEGG')])
+                        access_dictionary(unidict,uniprotID,'KEGG'),access_dictionary(unidict,uniprotID,'Gene_Name')])
             else :
-                line.extend(["","","",""])
+                line.extend(["","","","",""])
         else :
             if uniprotID in unidict :
                 line.extend([access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
-                        access_dictionary(unidict,uniprotID,'KEGG')])
+                        access_dictionary(unidict,uniprotID,'KEGG'),access_dictionary(unidict,uniprotID,'Gene_Name')])
             else :
-                line.extend(["","",""])
+                line.extend(["","","",""])
 
     #print ("tab ok")
 
@@ -230,9 +230,9 @@
         #add missing nextprot ID
         for line in tab[1:] : 
             uniprotID=line[0]
-            nextprotID=line[13]
+            nextprotID=line[14]
             if uniprotID in next_dict and (nextprotID == '' or (nextprotID != "NX_"+uniprotID and next_dict[uniprotID] == "NX_"+uniprotID)) :
-                line[13]=next_dict[uniprotID]
+                line[14]=next_dict[uniprotID]
 
     output_file = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + ".tsv"
     path = os.path.join(target_directory,output_file)
--- a/data_manager/resource_building.xml	Thu Jun 13 10:21:25 2019 -0400
+++ b/data_manager/resource_building.xml	Thu Dec 12 09:26:42 2019 +0000
@@ -1,4 +1,4 @@
-<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.06.13" tool_type="manage_data">
+<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.12.12" tool_type="manage_data">
 <description>
 to create or update reference files for proteore tools
 </description>