diff id_converter.xml @ 16:dce5d78877d5 draft

planemo upload commit f2b3d1ff6bea930b2ce32c009e4d3de39a17edfb-dirty
author proteore
date Mon, 28 Jan 2019 10:49:59 -0500
parents 5b680c447d27
children 4550bb47e171
line wrap: on
line diff
--- a/id_converter.xml	Thu Dec 06 05:13:48 2018 -0500
+++ b/id_converter.xml	Mon Jan 28 10:49:59 2019 -0500
@@ -1,14 +1,13 @@
-<tool id="IDconverter" name="ID Converter" version="2018.12.05">
+<tool id="IDconverter" name="ID Converter" version="2019.01.25">
     <description>(Human, Mouse, Rat)
     </description>
     <requirements>
-      <requirement type="package" version="3.4.1">R</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
     </stdio>
-    <command interpreter="Rscript">
-        $__tool_directory__/id_converter.R
+    <command><![CDATA[
+        python $__tool_directory__/id_converter.py 
         --id_type="$species.idtypein"
         #if $input.ids == "text"
             --input="$input.txt"
@@ -23,15 +22,15 @@
         --output="$output"
         --ref_file="$__tool_directory__/${ filter( lambda x: str( x[2] ) == str( $species.mapping_file ), $__app__.tool_data_tables['proteore_id_mapping'].get_fields() )[0][-1] }"
         
-    </command>
+    ]]></command>
     <inputs>
         <conditional name="input" >
-            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
+            <param name="ids" type="select" label="Enter IDs" help="Copy/paste or from a file (e.g. table)" >
                 <option value="text">Copy/paste your identifiers</option>
-                <option value="file" selected="true">Input file containing your identifiers</option>
+                <option value="file" selected="true">Input file containing IDs</option>
             </param>
             <when value="text" >
-                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' >
+                <param name="txt" type="text" label="Copy/paste IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' >
                     <sanitizer invalid_char="">
                         <valid initial="string.printable">
                             <remove value="&apos;"/>
@@ -47,28 +46,28 @@
                 </param>
             </when>
             <when value="file" >
-                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />
-                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
-                <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
+                <param name="file" type="data" format="txt,tabular" label="Select your file" help="" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+                <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
             </when>
         </conditional>
         <conditional name="species">
-            <param name="mapping_file" type="select" label="Select species for ID conversion" >
+            <param name="mapping_file" type="select" label="Species" >
                 <options from_data_table="proteore_id_mapping"/>
-                <option value="human_id_mapping"></option>
-                <option value="mouse_id_mapping"></option>
-                <option value="rat_id_mapping"></option>
+                <option value="Human"></option>
+                <option value="Mouse"></option>
+                <option value="Rat"></option>
             </param>
-            <when value="human_id_mapping">
-                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
+            <when value="Human">
+                <param name="idtypein" type="select" label="Type/source of IDs" optional="false" >
                     <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
-                    <option value="UniProt-AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
-                    <option value="UniProt-ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option>
+                    <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                     <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                     <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                     <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                    <option value="GO" >GO terms (Gene Ontology) ID (e.g. GO:0070062)</option>
                     <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
                     <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
                     <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
@@ -79,14 +78,14 @@
                     <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
                     <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
                 </param>
-                <section name="idto" title="Target type of IDs" expanded="True" >
-                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
+                <section name="idto" title="Target type" expanded="True" >
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
                         <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
-                        <option value="UniProt-AC" >Uniprot accession number (e.g. P31946)</option>
-                        <option value="UniProt-ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946)</option>
+                        <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                         <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                         <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                         <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                         <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
                         <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
@@ -101,13 +100,13 @@
                     </param>
                 </section>
             </when>
-            <when value="mouse_id_mapping">
-                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
-                    <option value="UniProt-AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
-                    <option value="UniProt-ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+            <when value="Mouse">
+                <param name="idtypein" type="select" label="Type/source of IDs" optional="false" >
+                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option>
+                    <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                     <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                     <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                     <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                     <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
                     <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
@@ -121,12 +120,12 @@
                     <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
                 </param>
                 <section name="idto" title="Target type of IDs" expanded="True" >
-                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
-                        <option value="UniProt-AC" >Uniprot accession number (e.g. P31946)</option>
-                        <option value="UniProt-ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
+                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946)</option>
+                        <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                         <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                         <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                         <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                         <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
                         <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
@@ -140,13 +139,13 @@
                     </param>
                 </section>
             </when>
-            <when value="rat_id_mapping">
-                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
-                    <option value="UniProt-AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
-                    <option value="UniProt-ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+            <when value="Rat">
+                <param name="idtypein" type="select" label="Select type/source of identifier of your list" optional="false" >
+                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option>
+                    <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                     <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                     <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                     <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                     <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
                     <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
@@ -160,12 +159,12 @@
                     <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
                 </param>
                 <section name="idto" title="Target type of IDs" expanded="True" >
-                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
-                        <option value="UniProt-AC" >Uniprot accession number (e.g. P31946)</option>
-                        <option value="UniProt-ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
+                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946)</option>
+                        <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                         <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                         <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                         <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                         <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
                         <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
@@ -203,27 +202,38 @@
         </test>
     </tests>
     <help><![CDATA[
-This tool converts a list of IDs to another identifier type, select the source and target type from the dropdown menus above (see below supported source and target types).
+**Description**
+This tool converts a list of identifiers (IDs) into another type of ID. Currently the conversion of IDs applies to the following three species: Human (homo sapiens, Mouse (Mus musculus), Rat (Rattus norvegicus). 
+Supported source and target type of IDs are listed below. After choosing the type of your input IDs, you can select one or more types of ID you may need. 
 
-After choosing the type of input IDs, you can choose one or more types of IDs you would like to map to. 
+-----
 
-If your input is a list of IDs or a single-column file, the tool will return a file containing the mapped IDs. Please, note that a "NA" is returned when there is no corresponding ID.
+**Input**
 
+A list of IDs (entered in a copy/paste mode) or a single-column file, the tool will then return a file containing the mapped IDs. 
 If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file.
 
-**Available databases**
+.. class:: warningmark
+
+Accession numbers with an hyphen ("-") that normally correspond to isoform are considered as similar to its canonical form.
+For example, "Q71U36-2" will be treated as "Q71U36".
+
+-----
+
+**Parameters**
+Target type of IDs currently supported:   
 
 * neXtProt ID (e.g. NX_P31946)
 
-* Uniprot accession number (e.g. P31946)
+* UniProt accession number (e.g. P31946)
 
-* Uniprot ID (e.g 1433B_HUMAN)
+* UniProt ID (e.g 1433B_HUMAN)
 
 * Entrez gene ID (e.g. 7529)
 
-* RefSeq (NCBI) protein (e.g.  NP_003395.1)
+* RefSeq protein (NCBI) (e.g.  NP_003395.1)
 
-* GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)
+* GI (NCBI GI number) (e.g. 21328448)
 
 * Protein DataBank ID (e.g. 2BR9:A)
 
@@ -249,43 +259,28 @@
 
 .. class:: warningmark 
 
-Nextprot and OMIM are only available for Human.
+Nextprot and OMIM only applicable to Human species.
 
 -----
 
-.. class:: infomark
+**Output**
+
+A text file containing the selected type of IDs (in addition to the original column(s) provided)
+Please, note that a "NA" is returned when there is no match between a source ID and the corresponding target ID.
+
+-----
+
+**Data sources (release date)**
 
 This tool converts human IDs using the following source files:
 
-* HUMAN_9606_idmapping_selected.tab (Uniprot 23/10/2018)
-    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-
-* HUMAN_9606_idmapping.dat (Uniprot 23/10/18)
-    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-
-* nextprot_ac_list_all.txt (Nextprot released on 10/10/2018)
-    Downloaded from ftp://ftp.nextprot.org/pub/current_release/ac_lists/
-
-* MOUSE_10090_idmapping_selected.tab (Uniprot 23/10/2018)
-    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-
-* MOUSE_10090_idmapping.dat (Uniprot 23/10/18)
-    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-
-* RAT_10116_idmapping.dat (Uniprot 23/10/18)
-    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-
-* RAT_10116_idmapping_selected.tab (Uniprot 23/10/18)
-    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-
------
-
-.. class:: warningmark
-
-Accession numbers with an hyphen ("-") that normally correspond to isoform are considered 
-(and will therefore be treated) as similar to its canonical form.
-
-For example, "Q71U36-2" will be treated as "Q71U36".
+- **HUMAN_9606_idmapping_selected.tab (Uniprot 23/10/2018)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+- **HUMAN_9606_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+- **nextprot_ac_list_all.txt (Nextprot released on 10/10/2018)**: ftp://ftp.nextprot.org/pub/current_release/ac_lists/
+- **MOUSE_10090_idmapping_selected.tab (Uniprot 23/10/2018)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+- **MOUSE_10090_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+- **RAT_10116_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+- **RAT_10116_idmapping_selected.tab (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
 
 -----
 
@@ -295,7 +290,7 @@
 
 David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
 
-Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
 
 This work has been partially funded through the French National Agency for Research (ANR) IFB project.