Mercurial > repos > proteore > proteore_id_converter

diff id_converter.xml @ 0:c83e45dab5c9 draft
planemo upload commit fa7e64b910f0f92c5f3e160536ace963c7ab0cba-dirty
author: proteore
date: Tue, 21 Aug 2018 09:06:21 -0400
children: e4d9a4ab1930
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/id_converter.xml	Tue Aug 21 09:06:21 2018 -0400
@@ -0,0 +1,246 @@
+<tool id="IDconverter" name="ID Converter" version="0.1.0">
+    <description>convert public database identifiers
+    </description>
+    <requirements>
+      <requirement type="package" version="3.4.1">R</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="Rscript">
+        $__tool_directory__/id_converter_UniProt.R
+        --id_type="$species.idtypein"
+        #if $input.ids == "text"
+            --input="$input.txt"
+            --input_type="list"
+        #else
+            --input="$input.file"
+            --column_number="$input.ncol"
+            --header="$input.header"
+            --input_type="file"
+        #end if
+        --target_ids="$species.idto.idtypeout"
+        --output="$output"
+        --ref_file="${ filter( lambda x: str( x[0] ) == str( $species.mapping_file ), $__app__.tool_data_tables['id_mapping_file'].get_fields() )[0][-1] }"
+        
+    </command>
+    <inputs>
+        <conditional name="input" >
+            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
+                <option value="text">Copy/paste your identifiers</option>
+                <option value="file" selected="true">Input file containing your identifiers</option>
+            </param>
+            <when value="text" >
+                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' >
+                    <sanitizer invalid_char="">
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="__sq__"/>
+                            <add source="&#x20;" target=""/>
+                            <add source="&#xA;" target=""/>
+                            <add source="&#xD;" target=""/>
+                            <add source="&#x9;" target=""/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file" >
+                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
+                <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
+            </when>
+        </conditional>
+        <conditional name="species">
+            <param name="mapping_file" type="select" label="Select species for ID conversion" >
+                <options from_data_table="id_mapping_file"/>
+                <option value="human_id_mapping"></option>
+                <option value="mouse_id_mapping"></option>
+            </param>
+            <when value="human_id_mapping">
+                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
+                    <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
+                    <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
+                    <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                    <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                    <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                    <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                    <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                </param>
+                <section name="idto" title="Target type of IDs" expanded="True" >
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
+                        <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
+                        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
+                        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                        <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                    </param>
+                </section>
+            </when>
+            <when value="mouse_id_mapping">
+                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
+                    <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
+                    <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                    <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                    <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                    <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                    <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                </param>
+                <section name="idto" title="Target type of IDs" expanded="True" >
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
+                        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
+                        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                        <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                    </param>
+                </section>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input" >
+                <param name="ids" value="file"/>
+                <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt" />
+                <param name="header" value="true" />
+                <param name="ncol" value="c1"/>
+            </conditional>
+            <conditional name="species">
+                <param name="mapping_file" value="human_id_mapping"/>
+                <param name="idtypein" value="UniProt.AC"/>
+                <section name="idto">
+                    <param name="idtypeout" value="neXtProt,UniProt.ID,GeneID,MIM,Ensembl" />
+                </section>
+            </conditional>
+            <output name="output" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+This tool converts a list of IDs to another identifier type, select the source and target type from the dropdown menus above (see below supported source and target types).
+
+After choosing the type of input IDs, you can choose one or more types of IDs you would like to map to. 
+
+If your input is a list of IDs or a single-column file, the tool will return a file containing the mapped IDs. Please, note that a "NA" is returned when there is no corresponding ID.
+
+If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file.
+
+**Available databases**
+
+* neXtProt ID (e.g. NX_P31946)
+
+* Uniprot accession number (e.g. P31946)
+
+* Uniprot ID (e.g 1433B_HUMAN)
+
+* Entrez gene ID (e.g. 7529)
+
+* RefSeq (NCBI) protein (e.g.  NP_003395.1)
+
+* GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)
+
+* Protein DataBank ID (e.g. 2BR9:A)
+
+* GOterms (Gene Ontology) ID (e.g. GO:0070062)
+
+* Protein Information Resource ID (e.g. S34755)
+
+* OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)
+
+* Unigene ID (e.g. Hs.643544)
+
+* Ensembl gene ID (e.g. ENSG00000166913)
+
+* Ensembl transcript ID (e.g. ENST00000353703)
+
+* Ensembl protein ID (e.g. ENSP00000300161)
+
+-----
+
+.. class:: infomark
+
+This tool converts human IDs using the following source files:
+
+* HUMAN_9606_idmapping_selected.tab (Uniprot 02/07/2018)
+    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+
+* HUMAN_9606_idmapping.dat (Uniprot 02/07/18)
+    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+
+* nextprot_ac_list_all.txt (Nextprot released on 17/01/2018)
+    Downloaded from ftp://ftp.nextprot.org/pub/current_release/ac_lists/
+
+-----
+
+.. class:: warningmark
+
+Accession numbers with an hyphen ("-") that normally correspond to isoform are considered 
+(and will therefore be treated) as similar to its canonical form.
+
+For example, "Q71U36-2" will be treated as "Q71U36".
+
+-----
+
+.. class:: infomark
+
+**Authors**
+
+T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+ 
+    ]]></help>
+    <citations>
+    </citations>
+</tool>
author	proteore
date	Tue, 21 Aug 2018 09:06:21 -0400
parents
children	e4d9a4ab1930